[llvm] a6a37a2 - [Support] On Windows, add optional support for {rpmalloc|snmalloc|mimalloc}
Alexandre Ganea via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 27 08:09:57 PDT 2020
Author: Alexandre Ganea
Date: 2020-08-27T11:09:46-04:00
New Revision: a6a37a2fcd2a8048a75bd0d8280497ed89d73224
URL: https://github.com/llvm/llvm-project/commit/a6a37a2fcd2a8048a75bd0d8280497ed89d73224
DIFF: https://github.com/llvm/llvm-project/commit/a6a37a2fcd2a8048a75bd0d8280497ed89d73224.diff
LOG: [Support] On Windows, add optional support for {rpmalloc|snmalloc|mimalloc}
This patch optionally replaces the CRT allocator (i.e., malloc and free) with rpmalloc (mixed public domain licence/MIT licence) or snmalloc (MIT licence) or mimalloc (MIT licence). Please note that the source code for these allocators must be available outside of LLVM's tree.
To enable, use `cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:/git/rpmalloc -DLLVM_USE_CRT_RELEASE=MT` where `D:/git/rpmalloc` has already been git clone'd from `https://github.com/mjansson/rpmalloc`. The same applies to snmalloc and mimalloc.
When enabled, the allocator will be embeded (statically linked) into the LLVM tools & libraries. This currently only works with the static CRT (/MT), although using the dynamic CRT (/MD) could potentially work as well in the future.
When enabled, this changes the memory stack from:
new/delete -> MS VC++ CRT malloc/free -> HeapAlloc -> VirtualAlloc
to:
new/delete -> {rpmalloc|snmalloc|mimalloc} -> VirtualAlloc
The goal of this patch is to bypass the application's global heap - which is thread-safe thus inducing locking - and instead take advantage of a modern lock-free, thread cache, allocator. On a 6-core Xeon Skylake we observe a 2.5x decrease in execution time when linking a large scale application with LLD and ThinLTO (12 min 20 sec -> 5 min 34 sec), when all hardware threads are being used (using LLD's flag /opt:lldltojobs=all). On a dual 36-core Xeon Skylake with all hardware threads used, we observe a 24x decrease in execution time (1 h 2 min -> 2 min 38 sec) when linking a large application with LLD and ThinLTO. Clang build times also see a decrease in the range 5-10% depending on the configuration.
Differential Revision: https://reviews.llvm.org/D71786
Added:
Modified:
llvm/CMakeLists.txt
llvm/docs/CMake.rst
llvm/lib/Support/CMakeLists.txt
llvm/tools/llvm-shlib/CMakeLists.txt
llvm/tools/remarks-shlib/CMakeLists.txt
llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cb36ff397be7..f6355a115642 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -567,6 +567,19 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT
option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
"Show target and host info when tools are invoked with --version." ON)
+option(LLVM_INTEGRATED_CRT_ALLOC "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled." OFF)
+if(LLVM_INTEGRATED_CRT_ALLOC)
+ if(NOT WIN32)
+ message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.")
+ endif()
+ if(LLVM_USE_SANITIZER)
+ message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!")
+ endif()
+ if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
+ message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!")
+ endif()
+endif()
+
# You can configure which libraries from LLVM you want to include in the
# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
# list of LLVM components. All component names handled by llvm-config are valid.
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index c45f854c9849..cba6308b03ec 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -461,6 +461,23 @@ LLVM-specific variables
**LLVM_PARALLEL_LINK_JOBS**:STRING
Define the maximum number of concurrent link jobs.
+**LLVM_USE_CRT_{target}**:STRING
+ On Windows, tells which version of the C runtime library (CRT) should be used.
+ For example, -DLLVM_USE_CRT_RELEASE=MT would statically link the CRT into the
+ LLVM tools and library.
+
+**LLVM_INTEGRATED_CRT_ALLOC**:PATH
+ On Windows, allows embedding a
diff erent C runtime allocator into the LLVM
+ tools and libraries. Using a lock-free allocator such as the ones listed below
+ greatly decreases ThinLTO link time by about an order of magnitude. It also
+ midly improves Clang build times, by about 5-10%. At the moment, rpmalloc,
+ snmalloc and mimalloc are supported. Use the path to `git clone` to select
+ the respective allocator, for example:
+ D:\git> git clone https://github.com/mjansson/rpmalloc
+ D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc
+ This flag needs to be used along with the static CRT, ie. if building the
+ Release target, add -DLLVM_USE_CRT_RELEASE=MT.
+
**LLVM_BUILD_DOCS**:BOOL
Adds all *enabled* documentation targets (i.e. Doxgyen and Sphinx targets) as
dependencies of the default build targets. This results in all of the (enabled)
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 2da2f203a283..ed90c911c0b3 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -60,6 +60,34 @@ if(LLVM_WITH_Z3)
set(system_libs ${system_libs} ${Z3_LIBRARIES})
endif()
+# Override the C runtime allocator on Windows and embed it into LLVM tools & libraries
+if(LLVM_INTEGRATED_CRT_ALLOC)
+ if (CMAKE_BUILD_TYPE AND NOT ${LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE}} MATCHES "^(MT|MTd)$")
+ message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC only works with /MT or /MTd. Use LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE} to set the appropriate option.")
+ endif()
+
+ string(REGEX REPLACE "(/|\\\\)$" "" LLVM_INTEGRATED_CRT_ALLOC "${LLVM_INTEGRATED_CRT_ALLOC}")
+
+ if(NOT EXISTS "${LLVM_INTEGRATED_CRT_ALLOC}")
+ message(FATAL_ERROR "Cannot find the path to `git clone` for the CRT allocator! (${LLVM_INTEGRATED_CRT_ALLOC}). Currently, rpmalloc, snmalloc and mimalloc are supported.")
+ endif()
+
+ if(LLVM_INTEGRATED_CRT_ALLOC MATCHES "rpmalloc$")
+ add_definitions(-DENABLE_OVERRIDE -DENABLE_PRELOAD)
+ set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/rpmalloc/rpmalloc.c")
+ elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17" PARENT_SCOPE)
+ set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/malloc.cc" "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/new.cc")
+ set(system_libs ${system_libs} "mincore.lib" "-INCLUDE:malloc")
+ elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "mimalloc$")
+ set(MIMALLOC_LIB "${LLVM_INTEGRATED_CRT_ALLOC}/out/msvc-x64/Release/mimalloc-static.lib")
+ if(NOT EXISTS "${MIMALLOC_LIB}")
+ message(FATAL_ERROR "Cannot find the mimalloc static library. To build it, first apply the patch from https://github.com/microsoft/mimalloc/issues/268 then build the Release x64 target through ${LLVM_INTEGRATED_CRT_ALLOC}\\ide\\vs2019\\mimalloc.sln")
+ endif()
+ set(system_libs ${system_libs} "${MIMALLOC_LIB}" "-INCLUDE:malloc")
+ endif()
+endif()
+
add_llvm_component_library(LLVMSupport
AArch64TargetParser.cpp
ABIBreak.cpp
@@ -181,6 +209,8 @@ add_llvm_component_library(LLVMSupport
xxhash.cpp
Z3Solver.cpp
+ ${ALLOCATOR_FILES}
+
# System
Atomic.cpp
DynamicLibrary.cpp
diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt
index f3a2056f80d3..b0ee19049e6f 100644
--- a/llvm/tools/llvm-shlib/CMakeLists.txt
+++ b/llvm/tools/llvm-shlib/CMakeLists.txt
@@ -176,4 +176,9 @@ if(LLVM_BUILD_LLVM_C_DYLIB AND MSVC)
# Finally link the target.
add_llvm_library(LLVM-C SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen)
+ if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
+ # Make sure we search LLVMSupport first, before the CRT libs
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
+ endif()
+
endif()
diff --git a/llvm/tools/remarks-shlib/CMakeLists.txt b/llvm/tools/remarks-shlib/CMakeLists.txt
index e948496c603a..cb22434d3662 100644
--- a/llvm/tools/remarks-shlib/CMakeLists.txt
+++ b/llvm/tools/remarks-shlib/CMakeLists.txt
@@ -10,6 +10,11 @@ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Remarks.exports)
add_llvm_library(Remarks SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES})
+if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
+ # Make sure we search LLVMSupport first, before the CRT libs
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
+endif()
+
install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/Remarks.h
DESTINATION include/llvm-c
COMPONENT Remarks)
diff --git a/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt b/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
index 1ea9826a3d15..00d20510ffd3 100644
--- a/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
+++ b/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
@@ -38,6 +38,15 @@ function(dynlib_add_module NAME)
)
add_dependencies(DynamicLibraryTests ${NAME})
+
+ # We need to link in the Support lib for the Memory allocator override,
+ # otherwise the DynamicLibrary.Shutdown test will fail, because it would
+ # allocate memory with the CRT allocator, and release it with our custom
+ # allocator (see llvm/lib/Support/Windows/Memory.inc).
+ # /INCLUDE:malloc is there to force searching into LLVMSupport before libucrt
+ llvm_map_components_to_libnames(llvm_libs Support)
+ target_link_libraries(${NAME} ${llvm_libs} "-INCLUDE:malloc")
+
endfunction(dynlib_add_module)
# Revert -Wl,-z,nodelete on this test since it relies on the file
More information about the llvm-commits
mailing list