[llvm] r341988 - [Support] Avoid calling CommandLineToArgvW from shell32.dll
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 11 13:22:39 PDT 2018
Author: rnk
Date: Tue Sep 11 13:22:39 2018
New Revision: 341988
URL: http://llvm.org/viewvc/llvm-project?rev=341988&view=rev
Log:
[Support] Avoid calling CommandLineToArgvW from shell32.dll
Summary:
Shell32.dll depends on gdi32.dll and user32.dll, which are mostly DLLs
for Windows GUI functionality. LLVM's utilities don't typically need GUI
functionality, and loading these DLLs seems to be slowing down startup.
Also, we already have an implementation of Windows command line
tokenization in cl::TokenizeWindowsCommandLine, so we can just use it.
The goal is to get the original argv in UTF-8, so that it can pass
through most LLVM string APIs. A Windows process starts life with a
UTF-16 string for its command line, and it can be retreived with
GetCommandLineW from kernel32.dll.
Previously, we would:
1. Get the wide command line
2. Call CommandLineToArgvW to handle quoting rules and separate it into
arguments.
3. For each wide argument, expand wildcards (* and ?) using
FindFirstFileW.
4. Convert each argument to UTF-8
Now we:
1. Get the wide command line, convert the whole thing to UTF-8
2. Tokenize the UTF-8 command line with cl::TokenizeWindowsCommandLine
3. For each argument, expand wildcards if present
- This requires converting back to UTF-16 to call FindFirstFileW
- Results of FindFirstFileW must be converted back to UTF-8
Reviewers: zturner
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D51941
Modified:
llvm/trunk/lib/Support/Windows/Process.inc
Modified: llvm/trunk/lib/Support/Windows/Process.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Windows/Process.inc?rev=341988&r1=341987&r2=341988&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Windows/Process.inc (original)
+++ llvm/trunk/lib/Support/Windows/Process.inc Tue Sep 11 13:22:39 2018
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WindowsError.h"
#include <malloc.h>
@@ -140,73 +142,59 @@ Optional<std::string> Process::GetEnv(St
return std::string(Res.data());
}
-static const char *AllocateString(const SmallVectorImpl<char> &S,
- BumpPtrAllocator &Alloc) {
- char *Buf = reinterpret_cast<char *>(Alloc.Allocate(S.size() + 1, 1));
- ::memcpy(Buf, S.data(), S.size());
- Buf[S.size()] = '\0';
- return Buf;
-}
-
-/// Convert Arg from UTF-16 to UTF-8 and push it onto Args.
-static std::error_code ConvertAndPushArg(const wchar_t *Arg,
- SmallVectorImpl<const char *> &Args,
- BumpPtrAllocator &Alloc) {
- SmallVector<char, MAX_PATH> ArgString;
- if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString))
- return ec;
- Args.push_back(AllocateString(ArgString, Alloc));
- return std::error_code();
-}
-
-/// Perform wildcard expansion of Arg, or just push it into Args if it
-/// doesn't have wildcards or doesn't match any files.
-static std::error_code WildcardExpand(const wchar_t *Arg,
+/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't
+/// have wildcards or doesn't match any files.
+static std::error_code WildcardExpand(StringRef Arg,
SmallVectorImpl<const char *> &Args,
- BumpPtrAllocator &Alloc) {
- if (!wcspbrk(Arg, L"*?")) {
- // Arg does not contain any wildcard characters. This is the common case.
- return ConvertAndPushArg(Arg, Args, Alloc);
- }
+ StringSaver &Saver) {
+ std::error_code EC;
- if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) {
- // Don't wildcard expand /?. Always treat it as an option.
- return ConvertAndPushArg(Arg, Args, Alloc);
+ // Don't expand Arg if it does not contain any wildcard characters. This is
+ // the common case. Also don't wildcard expand /?. Always treat it as an
+ // option.
+ if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
+ Arg == "-?") {
+ Args.push_back(Arg.data());
+ return EC;
}
- // Extract any directory part of the argument.
- SmallVector<char, MAX_PATH> Dir;
- if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir))
- return ec;
- sys::path::remove_filename(Dir);
- const int DirSize = Dir.size();
+ // Convert back to UTF-16 so we can call FindFirstFileW.
+ SmallVector<wchar_t, MAX_PATH> ArgW;
+ EC = windows::UTF8ToUTF16(Arg, ArgW);
+ if (EC)
+ return EC;
// Search for matching files.
// FIXME: This assumes the wildcard is only in the file name and not in the
// directory portion of the file path. For example, it doesn't handle
// "*\foo.c" nor "s?c\bar.cpp".
WIN32_FIND_DATAW FileData;
- HANDLE FindHandle = FindFirstFileW(Arg, &FileData);
+ HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData);
if (FindHandle == INVALID_HANDLE_VALUE) {
- return ConvertAndPushArg(Arg, Args, Alloc);
+ Args.push_back(Arg.data());
+ return EC;
}
- std::error_code ec;
+ // Extract any directory part of the argument.
+ SmallString<MAX_PATH> Dir = Arg;
+ sys::path::remove_filename(Dir);
+ const int DirSize = Dir.size();
+
do {
- SmallVector<char, MAX_PATH> FileName;
- ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
+ SmallString<MAX_PATH> FileName;
+ EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
FileName);
- if (ec)
+ if (EC)
break;
// Append FileName to Dir, and remove it afterwards.
- llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size()));
- Args.push_back(AllocateString(Dir, Alloc));
+ llvm::sys::path::append(Dir, FileName);
+ Args.push_back(Saver.save(StringRef(Dir)).data());
Dir.resize(DirSize);
} while (FindNextFileW(FindHandle, &FileData));
FindClose(FindHandle);
- return ec;
+ return EC;
}
static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
@@ -243,18 +231,20 @@ static std::error_code GetExecutableName
std::error_code
windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
BumpPtrAllocator &Alloc) {
- int ArgCount;
- std::unique_ptr<wchar_t *[], decltype(&LocalFree)> UnicodeCommandLine{
- CommandLineToArgvW(GetCommandLineW(), &ArgCount), &LocalFree};
- if (!UnicodeCommandLine)
- return mapWindowsError(::GetLastError());
-
+ const wchar_t *CmdW = GetCommandLineW();
+ assert(CmdW);
std::error_code EC;
+ SmallString<MAX_PATH> Cmd;
+ EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd);
+ if (EC)
+ return EC;
- Args.reserve(ArgCount);
+ SmallVector<const char *, 20> TmpArgs;
+ StringSaver Saver(Alloc);
+ cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
- for (int I = 0; I < ArgCount; ++I) {
- EC = WildcardExpand(UnicodeCommandLine[I], Args, Alloc);
+ for (const char *Arg : TmpArgs) {
+ EC = WildcardExpand(Arg, Args, Saver);
if (EC)
return EC;
}
@@ -266,7 +256,7 @@ windows::GetCommandLineArguments(SmallVe
if (EC)
return EC;
sys::path::append(Arg0, Filename);
- Args[0] = AllocateString(Arg0, Alloc);
+ Args[0] = Saver.save(Arg0).data();
return std::error_code();
}
More information about the llvm-commits
mailing list