[llvm] a6a37a2 - [Support] On Windows, add optional support for {rpmalloc|snmalloc|mimalloc}

Alexandre Ganea via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 27 08:09:57 PDT 2020


Author: Alexandre Ganea
Date: 2020-08-27T11:09:46-04:00
New Revision: a6a37a2fcd2a8048a75bd0d8280497ed89d73224

URL: https://github.com/llvm/llvm-project/commit/a6a37a2fcd2a8048a75bd0d8280497ed89d73224
DIFF: https://github.com/llvm/llvm-project/commit/a6a37a2fcd2a8048a75bd0d8280497ed89d73224.diff

LOG: [Support] On Windows, add optional support for {rpmalloc|snmalloc|mimalloc}

This patch optionally replaces the CRT allocator (i.e., malloc and free) with rpmalloc (mixed public domain licence/MIT licence) or snmalloc (MIT licence) or mimalloc (MIT licence). Please note that the source code for these allocators must be available outside of LLVM's tree.

To enable, use `cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:/git/rpmalloc -DLLVM_USE_CRT_RELEASE=MT` where `D:/git/rpmalloc` has already been git clone'd from `https://github.com/mjansson/rpmalloc`. The same applies to snmalloc and mimalloc.

When enabled, the allocator will be embeded (statically linked) into the LLVM tools & libraries. This currently only works with the static CRT (/MT), although using the dynamic CRT (/MD) could potentially work as well in the future.

When enabled, this changes the memory stack from:
  new/delete -> MS VC++ CRT malloc/free -> HeapAlloc -> VirtualAlloc
to:
  new/delete -> {rpmalloc|snmalloc|mimalloc} -> VirtualAlloc

The goal of this patch is to bypass the application's global heap - which is thread-safe thus inducing locking - and instead take advantage of a modern lock-free, thread cache, allocator. On a 6-core Xeon Skylake we observe a 2.5x decrease in execution time when linking a large scale application with LLD and ThinLTO (12 min 20 sec -> 5 min 34 sec), when all hardware threads are being used (using LLD's flag /opt:lldltojobs=all). On a dual 36-core Xeon Skylake with all hardware threads used, we observe a 24x decrease in execution time (1 h 2 min -> 2 min 38 sec) when linking a large application with LLD and ThinLTO. Clang build times also see a decrease in the range 5-10% depending on the configuration.

Differential Revision: https://reviews.llvm.org/D71786

Added: 
    

Modified: 
    llvm/CMakeLists.txt
    llvm/docs/CMake.rst
    llvm/lib/Support/CMakeLists.txt
    llvm/tools/llvm-shlib/CMakeLists.txt
    llvm/tools/remarks-shlib/CMakeLists.txt
    llvm/unittests/Support/DynamicLibrary/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cb36ff397be7..f6355a115642 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -567,6 +567,19 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT
 option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
   "Show target and host info when tools are invoked with --version." ON)
 
+option(LLVM_INTEGRATED_CRT_ALLOC "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled." OFF)
+if(LLVM_INTEGRATED_CRT_ALLOC)
+  if(NOT WIN32)
+    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.")
+  endif()
+  if(LLVM_USE_SANITIZER)
+    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!")
+  endif()
+  if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
+    message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!")
+  endif()
+endif()
+
 # You can configure which libraries from LLVM you want to include in the
 # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
 # list of LLVM components. All component names handled by llvm-config are valid.

diff  --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index c45f854c9849..cba6308b03ec 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -461,6 +461,23 @@ LLVM-specific variables
 **LLVM_PARALLEL_LINK_JOBS**:STRING
   Define the maximum number of concurrent link jobs.
 
+**LLVM_USE_CRT_{target}**:STRING
+  On Windows, tells which version of the C runtime library (CRT) should be used.
+  For example, -DLLVM_USE_CRT_RELEASE=MT would statically link the CRT into the
+  LLVM tools and library.
+
+**LLVM_INTEGRATED_CRT_ALLOC**:PATH
+  On Windows, allows embedding a 
diff erent C runtime allocator into the LLVM
+  tools and libraries. Using a lock-free allocator such as the ones listed below
+  greatly decreases ThinLTO link time by about an order of magnitude. It also
+  midly improves Clang build times, by about 5-10%. At the moment, rpmalloc,
+  snmalloc and mimalloc are supported. Use the path to `git clone` to select
+  the respective allocator, for example:
+    D:\git> git clone https://github.com/mjansson/rpmalloc
+    D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc
+  This flag needs to be used along with the static CRT, ie. if building the
+  Release target, add -DLLVM_USE_CRT_RELEASE=MT.
+
 **LLVM_BUILD_DOCS**:BOOL
   Adds all *enabled* documentation targets (i.e. Doxgyen and Sphinx targets) as
   dependencies of the default build targets.  This results in all of the (enabled)

diff  --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 2da2f203a283..ed90c911c0b3 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -60,6 +60,34 @@ if(LLVM_WITH_Z3)
   set(system_libs ${system_libs} ${Z3_LIBRARIES})
 endif()
 
+# Override the C runtime allocator on Windows and embed it into LLVM tools & libraries
+if(LLVM_INTEGRATED_CRT_ALLOC)
+  if (CMAKE_BUILD_TYPE AND NOT ${LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE}} MATCHES "^(MT|MTd)$")
+    message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC only works with /MT or /MTd. Use LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE} to set the appropriate option.")
+  endif()
+
+  string(REGEX REPLACE "(/|\\\\)$" "" LLVM_INTEGRATED_CRT_ALLOC "${LLVM_INTEGRATED_CRT_ALLOC}")
+
+  if(NOT EXISTS "${LLVM_INTEGRATED_CRT_ALLOC}")
+    message(FATAL_ERROR "Cannot find the path to `git clone` for the CRT allocator! (${LLVM_INTEGRATED_CRT_ALLOC}). Currently, rpmalloc, snmalloc and mimalloc are supported.")
+  endif()
+
+  if(LLVM_INTEGRATED_CRT_ALLOC MATCHES "rpmalloc$")
+    add_definitions(-DENABLE_OVERRIDE -DENABLE_PRELOAD)
+    set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/rpmalloc/rpmalloc.c")
+  elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17" PARENT_SCOPE)
+    set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/malloc.cc" "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/new.cc")
+    set(system_libs ${system_libs} "mincore.lib" "-INCLUDE:malloc")
+  elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "mimalloc$")
+    set(MIMALLOC_LIB "${LLVM_INTEGRATED_CRT_ALLOC}/out/msvc-x64/Release/mimalloc-static.lib")
+    if(NOT EXISTS "${MIMALLOC_LIB}")
+	  message(FATAL_ERROR "Cannot find the mimalloc static library. To build it, first apply the patch from https://github.com/microsoft/mimalloc/issues/268 then build the Release x64 target through ${LLVM_INTEGRATED_CRT_ALLOC}\\ide\\vs2019\\mimalloc.sln")
+    endif()
+    set(system_libs ${system_libs} "${MIMALLOC_LIB}" "-INCLUDE:malloc")
+  endif()
+endif()
+
 add_llvm_component_library(LLVMSupport
   AArch64TargetParser.cpp
   ABIBreak.cpp
@@ -181,6 +209,8 @@ add_llvm_component_library(LLVMSupport
   xxhash.cpp
   Z3Solver.cpp
 
+  ${ALLOCATOR_FILES}
+
 # System
   Atomic.cpp
   DynamicLibrary.cpp

diff  --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt
index f3a2056f80d3..b0ee19049e6f 100644
--- a/llvm/tools/llvm-shlib/CMakeLists.txt
+++ b/llvm/tools/llvm-shlib/CMakeLists.txt
@@ -176,4 +176,9 @@ if(LLVM_BUILD_LLVM_C_DYLIB AND MSVC)
   # Finally link the target.
   add_llvm_library(LLVM-C SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen)
 
+  if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
+    # Make sure we search LLVMSupport first, before the CRT libs
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
+  endif()
+  
 endif()

diff  --git a/llvm/tools/remarks-shlib/CMakeLists.txt b/llvm/tools/remarks-shlib/CMakeLists.txt
index e948496c603a..cb22434d3662 100644
--- a/llvm/tools/remarks-shlib/CMakeLists.txt
+++ b/llvm/tools/remarks-shlib/CMakeLists.txt
@@ -10,6 +10,11 @@ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Remarks.exports)
 
 add_llvm_library(Remarks SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES})
 
+if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
+  # Make sure we search LLVMSupport first, before the CRT libs
+  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
+endif()
+  
 install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/Remarks.h
   DESTINATION include/llvm-c
   COMPONENT Remarks)

diff  --git a/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt b/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
index 1ea9826a3d15..00d20510ffd3 100644
--- a/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
+++ b/llvm/unittests/Support/DynamicLibrary/CMakeLists.txt
@@ -38,6 +38,15 @@ function(dynlib_add_module NAME)
     )
 
   add_dependencies(DynamicLibraryTests ${NAME})
+  
+  # We need to link in the Support lib for the Memory allocator override,
+  # otherwise the DynamicLibrary.Shutdown test will fail, because it would
+  # allocate memory with the CRT allocator, and release it with our custom
+  # allocator (see llvm/lib/Support/Windows/Memory.inc).
+  # /INCLUDE:malloc is there to force searching into LLVMSupport before libucrt
+  llvm_map_components_to_libnames(llvm_libs Support)
+  target_link_libraries(${NAME} ${llvm_libs} "-INCLUDE:malloc")
+  
 endfunction(dynlib_add_module)
 
 # Revert -Wl,-z,nodelete on this test since it relies on the file


        


More information about the llvm-commits mailing list