[llvm] r341988 - [Support] Avoid calling CommandLineToArgvW from shell32.dll

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 11 13:22:39 PDT 2018


Author: rnk
Date: Tue Sep 11 13:22:39 2018
New Revision: 341988

URL: http://llvm.org/viewvc/llvm-project?rev=341988&view=rev
Log:
[Support] Avoid calling CommandLineToArgvW from shell32.dll

Summary:
Shell32.dll depends on gdi32.dll and user32.dll, which are mostly DLLs
for Windows GUI functionality. LLVM's utilities don't typically need GUI
functionality, and loading these DLLs seems to be slowing down startup.
Also, we already have an implementation of Windows command line
tokenization in cl::TokenizeWindowsCommandLine, so we can just use it.

The goal is to get the original argv in UTF-8, so that it can pass
through most LLVM string APIs. A Windows process starts life with a
UTF-16 string for its command line, and it can be retreived with
GetCommandLineW from kernel32.dll.

Previously, we would:
1. Get the wide command line
2. Call CommandLineToArgvW to handle quoting rules and separate it into
   arguments.
3. For each wide argument, expand wildcards (* and ?) using
   FindFirstFileW.
4. Convert each argument to UTF-8

Now we:
1. Get the wide command line, convert the whole thing to UTF-8
2. Tokenize the UTF-8 command line with cl::TokenizeWindowsCommandLine
3. For each argument, expand wildcards if present
   - This requires converting back to UTF-16 to call FindFirstFileW
   - Results of FindFirstFileW must be converted back to UTF-8

Reviewers: zturner

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D51941

Modified:
    llvm/trunk/lib/Support/Windows/Process.inc

Modified: llvm/trunk/lib/Support/Windows/Process.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Process.inc?rev=341988&r1=341987&r2=341988&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Process.inc (original)
+++ llvm/trunk/lib/Support/Windows/Process.inc Tue Sep 11 13:22:39 2018
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/WindowsError.h"
 #include <malloc.h>
 
@@ -140,73 +142,59 @@ Optional<std::string> Process::GetEnv(St
   return std::string(Res.data());
 }
 
-static const char *AllocateString(const SmallVectorImpl<char> &S,
-                                  BumpPtrAllocator &Alloc) {
-  char *Buf = reinterpret_cast<char *>(Alloc.Allocate(S.size() + 1, 1));
-  ::memcpy(Buf, S.data(), S.size());
-  Buf[S.size()] = '\0';
-  return Buf;
-}
-
-/// Convert Arg from UTF-16 to UTF-8 and push it onto Args.
-static std::error_code ConvertAndPushArg(const wchar_t *Arg,
-                                         SmallVectorImpl<const char *> &Args,
-                                         BumpPtrAllocator &Alloc) {
-  SmallVector<char, MAX_PATH> ArgString;
-  if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString))
-    return ec;
-  Args.push_back(AllocateString(ArgString, Alloc));
-  return std::error_code();
-}
-
-/// Perform wildcard expansion of Arg, or just push it into Args if it
-/// doesn't have wildcards or doesn't match any files.
-static std::error_code WildcardExpand(const wchar_t *Arg,
+/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't
+/// have wildcards or doesn't match any files.
+static std::error_code WildcardExpand(StringRef Arg,
                                       SmallVectorImpl<const char *> &Args,
-                                      BumpPtrAllocator &Alloc) {
-  if (!wcspbrk(Arg, L"*?")) {
-    // Arg does not contain any wildcard characters. This is the common case.
-    return ConvertAndPushArg(Arg, Args, Alloc);
-  }
+                                      StringSaver &Saver) {
+  std::error_code EC;
 
-  if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) {
-    // Don't wildcard expand /?. Always treat it as an option.
-    return ConvertAndPushArg(Arg, Args, Alloc);
+  // Don't expand Arg if it does not contain any wildcard characters. This is
+  // the common case. Also don't wildcard expand /?. Always treat it as an
+  // option.
+  if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
+      Arg == "-?") {
+    Args.push_back(Arg.data());
+    return EC;
   }
 
-  // Extract any directory part of the argument.
-  SmallVector<char, MAX_PATH> Dir;
-  if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir))
-    return ec;
-  sys::path::remove_filename(Dir);
-  const int DirSize = Dir.size();
+  // Convert back to UTF-16 so we can call FindFirstFileW.
+  SmallVector<wchar_t, MAX_PATH> ArgW;
+  EC = windows::UTF8ToUTF16(Arg, ArgW);
+  if (EC)
+    return EC;
 
   // Search for matching files.
   // FIXME:  This assumes the wildcard is only in the file name and not in the
   // directory portion of the file path.  For example, it doesn't handle
   // "*\foo.c" nor "s?c\bar.cpp".
   WIN32_FIND_DATAW FileData;
-  HANDLE FindHandle = FindFirstFileW(Arg, &FileData);
+  HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData);
   if (FindHandle == INVALID_HANDLE_VALUE) {
-    return ConvertAndPushArg(Arg, Args, Alloc);
+    Args.push_back(Arg.data());
+    return EC;
   }
 
-  std::error_code ec;
+  // Extract any directory part of the argument.
+  SmallString<MAX_PATH> Dir = Arg;
+  sys::path::remove_filename(Dir);
+  const int DirSize = Dir.size();
+
   do {
-    SmallVector<char, MAX_PATH> FileName;
-    ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
+    SmallString<MAX_PATH> FileName;
+    EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
                               FileName);
-    if (ec)
+    if (EC)
       break;
 
     // Append FileName to Dir, and remove it afterwards.
-    llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size()));
-    Args.push_back(AllocateString(Dir, Alloc));
+    llvm::sys::path::append(Dir, FileName);
+    Args.push_back(Saver.save(StringRef(Dir)).data());
     Dir.resize(DirSize);
   } while (FindNextFileW(FindHandle, &FileData));
 
   FindClose(FindHandle);
-  return ec;
+  return EC;
 }
 
 static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
@@ -243,18 +231,20 @@ static std::error_code GetExecutableName
 std::error_code
 windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
                                  BumpPtrAllocator &Alloc) {
-  int ArgCount;
-  std::unique_ptr<wchar_t *[], decltype(&LocalFree)> UnicodeCommandLine{
-    CommandLineToArgvW(GetCommandLineW(), &ArgCount), &LocalFree};
-  if (!UnicodeCommandLine)
-    return mapWindowsError(::GetLastError());
-
+  const wchar_t *CmdW = GetCommandLineW();
+  assert(CmdW);
   std::error_code EC;
+  SmallString<MAX_PATH> Cmd;
+  EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd);
+  if (EC)
+    return EC;
 
-  Args.reserve(ArgCount);
+  SmallVector<const char *, 20> TmpArgs;
+  StringSaver Saver(Alloc);
+  cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
 
-  for (int I = 0; I < ArgCount; ++I) {
-    EC = WildcardExpand(UnicodeCommandLine[I], Args, Alloc);
+  for (const char *Arg : TmpArgs) {
+    EC = WildcardExpand(Arg, Args, Saver);
     if (EC)
       return EC;
   }
@@ -266,7 +256,7 @@ windows::GetCommandLineArguments(SmallVe
   if (EC)
     return EC;
   sys::path::append(Arg0, Filename);
-  Args[0] = AllocateString(Arg0, Alloc);
+  Args[0] = Saver.save(Arg0).data();
   return std::error_code();
 }
 




More information about the llvm-commits mailing list