[compiler-rt] 3d4bba3 - [MemProf] Memory profiling runtime support

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 16 09:47:15 PDT 2020


Author: Teresa Johnson
Date: 2020-10-16T09:47:02-07:00
New Revision: 3d4bba302d2460b9ac6463ef920c301f1f40fb41

URL: https://github.com/llvm/llvm-project/commit/3d4bba302d2460b9ac6463ef920c301f1f40fb41
DIFF: https://github.com/llvm/llvm-project/commit/3d4bba302d2460b9ac6463ef920c301f1f40fb41.diff

LOG: [MemProf] Memory profiling runtime support

See RFC for background:
http://lists.llvm.org/pipermail/llvm-dev/2020-June/142744.html

Follow on companion to the clang/llvm instrumentation support in D85948
and committed earlier.

This patch adds the compiler-rt runtime support for the memory
profiling.

Note that much of this support was cloned from asan (and then greatly
simplified and renamed). For example the interactions with the
sanitizer_common allocators, error handling, interception, etc.

The bulk of the memory profiling specific code can be found in the
MemInfoBlock, MemInfoBlockCache, and related classes defined and used
in memprof_allocator.cpp.

For now, the memory profile is dumped to text (stderr by default, but
honors the sanitizer_common log_path flag). It is dumped in either a
default verbose format, or an optional terse format.

This patch also adds a set of tests for the core functionality.

Differential Revision: https://reviews.llvm.org/D87120

Added: 
    compiler-rt/include/sanitizer/memprof_interface.h
    compiler-rt/lib/memprof/CMakeLists.txt
    compiler-rt/lib/memprof/README.txt
    compiler-rt/lib/memprof/memprof.syms.extra
    compiler-rt/lib/memprof/memprof_allocator.cpp
    compiler-rt/lib/memprof/memprof_allocator.h
    compiler-rt/lib/memprof/memprof_descriptions.cpp
    compiler-rt/lib/memprof/memprof_descriptions.h
    compiler-rt/lib/memprof/memprof_flags.cpp
    compiler-rt/lib/memprof/memprof_flags.h
    compiler-rt/lib/memprof/memprof_flags.inc
    compiler-rt/lib/memprof/memprof_init_version.h
    compiler-rt/lib/memprof/memprof_interceptors.cpp
    compiler-rt/lib/memprof/memprof_interceptors.h
    compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp
    compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h
    compiler-rt/lib/memprof/memprof_interface_internal.h
    compiler-rt/lib/memprof/memprof_internal.h
    compiler-rt/lib/memprof/memprof_linux.cpp
    compiler-rt/lib/memprof/memprof_malloc_linux.cpp
    compiler-rt/lib/memprof/memprof_mapping.h
    compiler-rt/lib/memprof/memprof_new_delete.cpp
    compiler-rt/lib/memprof/memprof_posix.cpp
    compiler-rt/lib/memprof/memprof_preinit.cpp
    compiler-rt/lib/memprof/memprof_rtl.cpp
    compiler-rt/lib/memprof/memprof_shadow_setup.cpp
    compiler-rt/lib/memprof/memprof_stack.cpp
    compiler-rt/lib/memprof/memprof_stack.h
    compiler-rt/lib/memprof/memprof_stats.cpp
    compiler-rt/lib/memprof/memprof_stats.h
    compiler-rt/lib/memprof/memprof_thread.cpp
    compiler-rt/lib/memprof/memprof_thread.h
    compiler-rt/lib/memprof/weak_symbols.txt
    compiler-rt/test/memprof/CMakeLists.txt
    compiler-rt/test/memprof/TestCases/atexit_stats.cpp
    compiler-rt/test/memprof/TestCases/default_options.cpp
    compiler-rt/test/memprof/TestCases/dump_process_map.cpp
    compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp
    compiler-rt/test/memprof/TestCases/interface_test.cpp
    compiler-rt/test/memprof/TestCases/log_path_test.cpp
    compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp
    compiler-rt/test/memprof/TestCases/malloc_hook.cpp
    compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
    compiler-rt/test/memprof/TestCases/memprof_options-help.cpp
    compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
    compiler-rt/test/memprof/TestCases/realloc.cpp
    compiler-rt/test/memprof/TestCases/stress_dtls.c
    compiler-rt/test/memprof/TestCases/test_malloc_load_store.c
    compiler-rt/test/memprof/TestCases/test_memintrin.cpp
    compiler-rt/test/memprof/TestCases/test_new_load_store.cpp
    compiler-rt/test/memprof/TestCases/test_terse.cpp
    compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp
    compiler-rt/test/memprof/lit.cfg.py
    compiler-rt/test/memprof/lit.site.cfg.py.in

Modified: 
    compiler-rt/CMakeLists.txt
    compiler-rt/cmake/config-ix.cmake
    compiler-rt/include/CMakeLists.txt
    compiler-rt/lib/CMakeLists.txt
    compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.cpp
    compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h
    compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
    compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
    compiler-rt/test/CMakeLists.txt
    compiler-rt/test/lit.common.cfg.py
    compiler-rt/test/lit.common.configured.in

Removed: 
    


################################################################################
diff  --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 45c52f804f48..82ed440bc013 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -45,6 +45,8 @@ option(COMPILER_RT_BUILD_LIBFUZZER "Build libFuzzer" ON)
 mark_as_advanced(COMPILER_RT_BUILD_LIBFUZZER)
 option(COMPILER_RT_BUILD_PROFILE "Build profile runtime" ON)
 mark_as_advanced(COMPILER_RT_BUILD_PROFILE)
+option(COMPILER_RT_BUILD_MEMPROF "Build memory profiling runtime" ON)
+mark_as_advanced(COMPILER_RT_BUILD_MEMPROF)
 option(COMPILER_RT_BUILD_XRAY_NO_PREINIT "Build xray with no preinit patching" OFF)
 mark_as_advanced(COMPILER_RT_BUILD_XRAY_NO_PREINIT)
 

diff  --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 12c2996210c4..dd49544a476e 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -324,6 +324,7 @@ else()
 endif()
 set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X})
 set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
+set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
     ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9})
 set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
@@ -551,6 +552,9 @@ if(APPLE)
   list_intersect(HWASAN_SUPPORTED_ARCH
     ALL_HWASAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(MEMPROF_SUPPORTED_ARCH
+    ALL_MEMPROF_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(PROFILE_SUPPORTED_ARCH
     ALL_PROFILE_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
@@ -599,6 +603,7 @@ else()
   filter_available_targets(LSAN_SUPPORTED_ARCH ${ALL_LSAN_SUPPORTED_ARCH})
   filter_available_targets(MSAN_SUPPORTED_ARCH ${ALL_MSAN_SUPPORTED_ARCH})
   filter_available_targets(HWASAN_SUPPORTED_ARCH ${ALL_HWASAN_SUPPORTED_ARCH})
+  filter_available_targets(MEMPROF_SUPPORTED_ARCH ${ALL_MEMPROF_SUPPORTED_ARCH})
   filter_available_targets(PROFILE_SUPPORTED_ARCH ${ALL_PROFILE_SUPPORTED_ARCH})
   filter_available_targets(TSAN_SUPPORTED_ARCH ${ALL_TSAN_SUPPORTED_ARCH})
   filter_available_targets(UBSAN_SUPPORTED_ARCH ${ALL_UBSAN_SUPPORTED_ARCH})
@@ -702,6 +707,13 @@ else()
   set(COMPILER_RT_HAS_HWASAN FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND MEMPROF_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_MEMPROF TRUE)
+else()
+  set(COMPILER_RT_HAS_MEMPROF FALSE)
+endif()
+
 if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
     OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|Fuchsia|SunOS|NetBSD")
   set(COMPILER_RT_HAS_PROFILE TRUE)

diff  --git a/compiler-rt/include/CMakeLists.txt b/compiler-rt/include/CMakeLists.txt
index d47d7baeb118..de4cf3655610 100644
--- a/compiler-rt/include/CMakeLists.txt
+++ b/compiler-rt/include/CMakeLists.txt
@@ -5,6 +5,7 @@ if (COMPILER_RT_BUILD_SANITIZERS)
     sanitizer/common_interface_defs.h
     sanitizer/coverage_interface.h
     sanitizer/dfsan_interface.h
+    sanitizer/memprof_interface.h
     sanitizer/hwasan_interface.h
     sanitizer/linux_syscall_hooks.h
     sanitizer/lsan_interface.h

diff  --git a/compiler-rt/include/sanitizer/memprof_interface.h b/compiler-rt/include/sanitizer/memprof_interface.h
new file mode 100644
index 000000000000..a72126051003
--- /dev/null
+++ b/compiler-rt/include/sanitizer/memprof_interface.h
@@ -0,0 +1,60 @@
+//===-- sanitizer/memprof_interface.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler (MemProf).
+//
+// Public interface header.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_MEMPROF_INTERFACE_H
+#define SANITIZER_MEMPROF_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/// Records access to a memory region (<c>[addr, addr+size)</c>).
+///
+/// This memory must be previously allocated by your program.
+///
+/// \param addr Start of memory region.
+/// \param size Size of memory region.
+void __memprof_record_access_range(void const volatile *addr, size_t size);
+
+/// Records access to a memory address <c><i>addr</i></c>.
+///
+/// This memory must be previously allocated by your program.
+///
+/// \param addr Accessed memory address
+void __memprof_record_access(void const volatile *addr);
+
+/// User-provided callback on MemProf errors.
+///
+/// You can provide a function that would be called immediately when MemProf
+/// detects an error. This is useful in cases when MemProf detects an error but
+/// your program crashes before the MemProf report is printed.
+void __memprof_on_error(void);
+
+/// Prints accumulated statistics to <c>stderr</c> (useful for calling from the
+/// debugger).
+void __memprof_print_accumulated_stats(void);
+
+/// User-provided default option settings.
+///
+/// You can provide your own implementation of this function to return a string
+/// containing MemProf runtime options (for example,
+/// <c>verbosity=1:print_stats=1</c>).
+///
+/// \returns Default options string.
+const char *__memprof_default_options(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // SANITIZER_MEMPROF_INTERFACE_H

diff  --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
index 2020ee32d4f7..69c6c6c814d1 100644
--- a/compiler-rt/lib/CMakeLists.txt
+++ b/compiler-rt/lib/CMakeLists.txt
@@ -60,6 +60,10 @@ if(COMPILER_RT_BUILD_LIBFUZZER)
   compiler_rt_build_runtime(fuzzer)
 endif()
 
+if(COMPILER_RT_BUILD_MEMPROF AND COMPILER_RT_HAS_SANITIZER_COMMON)
+  compiler_rt_build_runtime(memprof)
+endif()
+
 # It doesn't normally make sense to build runtimes when a sanitizer is enabled,
 # so we don't add_subdirectory the runtimes in that case. However, the opposite
 # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer

diff  --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt
new file mode 100644
index 000000000000..a990ef3304d5
--- /dev/null
+++ b/compiler-rt/lib/memprof/CMakeLists.txt
@@ -0,0 +1,195 @@
+# Build for the Memory Profiler runtime support library.
+
+set(MEMPROF_SOURCES
+  memprof_allocator.cpp
+  memprof_descriptions.cpp
+  memprof_flags.cpp
+  memprof_interceptors.cpp
+  memprof_interceptors_memintrinsics.cpp
+  memprof_linux.cpp
+  memprof_malloc_linux.cpp
+  memprof_posix.cpp
+  memprof_rtl.cpp
+  memprof_shadow_setup.cpp
+  memprof_stack.cpp
+  memprof_stats.cpp
+  memprof_thread.cpp
+  )
+
+set(MEMPROF_CXX_SOURCES
+  memprof_new_delete.cpp
+  )
+
+set(MEMPROF_PREINIT_SOURCES
+  memprof_preinit.cpp
+  )
+
+SET(MEMPROF_HEADERS
+  memprof_allocator.h
+  memprof_descriptions.h
+  memprof_flags.h
+  memprof_flags.inc
+  memprof_init_version.h
+  memprof_interceptors.h
+  memprof_interceptors_memintrinsics.h
+  memprof_interface_internal.h
+  memprof_internal.h
+  memprof_mapping.h
+  memprof_stack.h
+  memprof_stats.h
+  memprof_thread.h
+  )
+
+include_directories(..)
+
+set(MEMPROF_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+set(MEMPROF_COMMON_DEFINITIONS "")
+
+append_rtti_flag(OFF MEMPROF_CFLAGS)
+
+set(MEMPROF_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
+
+set(MEMPROF_DYNAMIC_DEFINITIONS
+  ${MEMPROF_COMMON_DEFINITIONS} MEMPROF_DYNAMIC=1)
+
+set(MEMPROF_DYNAMIC_CFLAGS ${MEMPROF_CFLAGS})
+append_list_if(COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC
+  -ftls-model=initial-exec MEMPROF_DYNAMIC_CFLAGS)
+
+set(MEMPROF_DYNAMIC_LIBS ${SANITIZER_CXX_ABI_LIBRARIES} ${SANITIZER_COMMON_LINK_LIBS})
+
+append_list_if(COMPILER_RT_HAS_LIBDL dl MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBRT rt MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBM m MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread MEMPROF_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBLOG log MEMPROF_DYNAMIC_LIBS)
+
+if (TARGET cxx-headers OR HAVE_LIBCXX)
+  set(MEMPROF_DEPS cxx-headers)
+endif()
+
+# Compile MemProf sources into an object library.
+
+add_compiler_rt_object_libraries(RTMemprof_dynamic
+  OS ${SANITIZER_COMMON_SUPPORTED_OS}
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${MEMPROF_SOURCES} ${MEMPROF_CXX_SOURCES}
+  ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+  CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+  DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+
+add_compiler_rt_object_libraries(RTMemprof
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${MEMPROF_SOURCES}
+  ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+add_compiler_rt_object_libraries(RTMemprof_cxx
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${MEMPROF_CXX_SOURCES}
+  ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+add_compiler_rt_object_libraries(RTMemprof_preinit
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${MEMPROF_PREINIT_SOURCES}
+  ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+
+file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "")
+add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+  CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+  DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+  DEPS ${MEMPROF_DEPS})
+
+# Build MemProf runtimes shipped with Clang.
+add_compiler_rt_component(memprof)
+
+# Build separate libraries for each target.
+
+set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS
+  RTInterception
+  RTSanitizerCommon
+  RTSanitizerCommonLibc
+  RTSanitizerCommonCoverage
+  RTSanitizerCommonSymbolizer)
+
+add_compiler_rt_runtime(clang_rt.memprof
+  STATIC
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  OBJECT_LIBS RTMemprof_preinit
+              RTMemprof
+              ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  PARENT_TARGET memprof)
+
+add_compiler_rt_runtime(clang_rt.memprof_cxx
+  STATIC
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  OBJECT_LIBS RTMemprof_cxx
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  PARENT_TARGET memprof)
+
+add_compiler_rt_runtime(clang_rt.memprof-preinit
+  STATIC
+  ARCHS ${MEMPROF_SUPPORTED_ARCH}
+  OBJECT_LIBS RTMemprof_preinit
+  CFLAGS ${MEMPROF_CFLAGS}
+  DEFS ${MEMPROF_COMMON_DEFINITIONS}
+  PARENT_TARGET memprof)
+
+foreach(arch ${MEMPROF_SUPPORTED_ARCH})
+  if (UNIX)
+    add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch}
+                                  LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch}
+                                  EXTRA memprof.syms.extra)
+    set(VERSION_SCRIPT_FLAG
+         -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+    set_property(SOURCE
+      ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+      APPEND PROPERTY
+      OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+  else()
+    set(VERSION_SCRIPT_FLAG)
+  endif()
+
+  set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION)
+
+  add_compiler_rt_runtime(clang_rt.memprof
+    SHARED
+    ARCHS ${arch}
+    OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+            RTMemprof_dynamic
+            # The only purpose of RTMemprof_dynamic_version_script_dummy is to
+            # carry a dependency of the shared runtime on the version script.
+            # Replacing it with a straightforward
+            # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list)
+            # generates an order-only dependency in ninja.
+            RTMemprof_dynamic_version_script_dummy
+            ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION}
+    CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+    LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS}
+              ${VERSION_SCRIPT_FLAG}
+    LINK_LIBS ${MEMPROF_DYNAMIC_LIBS}
+    DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  if (SANITIZER_USE_SYMBOLS)
+    add_sanitizer_rt_symbols(clang_rt.memprof_cxx
+      ARCHS ${arch})
+    add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols)
+    add_sanitizer_rt_symbols(clang_rt.memprof
+      ARCHS ${arch}
+      EXTRA memprof.syms.extra)
+    add_dependencies(memprof clang_rt.memprof-${arch}-symbols)
+  endif()
+endforeach()

diff  --git a/compiler-rt/lib/memprof/README.txt b/compiler-rt/lib/memprof/README.txt
new file mode 100644
index 000000000000..82012c5e71b0
--- /dev/null
+++ b/compiler-rt/lib/memprof/README.txt
@@ -0,0 +1,17 @@
+MemProfiling RT
+================================
+This directory contains sources of the MemProfiling (MemProf) runtime library.
+
+Directory structure:
+README.txt       : This file.
+CMakeLists.txt   : File for cmake-based build.
+memprof_*.{cc,h}    : Sources of the memprof runtime library.
+
+Also MemProf runtime needs the following libraries:
+lib/interception/      : Machinery used to intercept function calls.
+lib/sanitizer_common/  : Code shared between various sanitizers.
+
+MemProf runtime can only be built by CMake. You can run MemProf tests
+from the root of your CMake build tree:
+
+make check-memprof

diff  --git a/compiler-rt/lib/memprof/memprof.syms.extra b/compiler-rt/lib/memprof/memprof.syms.extra
new file mode 100644
index 000000000000..173280ffe97f
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof.syms.extra
@@ -0,0 +1 @@
+__memprof_*

diff  --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
new file mode 100644
index 000000000000..f13fde02c135
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -0,0 +1,898 @@
+//===-- memprof_allocator.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Implementation of MemProf's memory allocator, which uses the allocator
+// from sanitizer_common.
+//
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_list.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+#include <ctime>
+#include <sched.h>
+#include <stdlib.h>
+
+namespace __memprof {
+
+static int GetCpuId(void) {
+  // _memprof_preinit is called via the preinit_array, which subsequently calls
+  // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
+  // will seg fault as the address of __vdso_getcpu will be null.
+  if (!memprof_init_done)
+    return -1;
+  return sched_getcpu();
+}
+
+// Compute the timestamp in ms.
+static int GetTimestamp(void) {
+  // timespec_get will segfault if called from dl_init
+  if (!memprof_timestamp_inited) {
+    // By returning 0, this will be effectively treated as being
+    // timestamped at memprof init time (when memprof_init_timestamp_s
+    // is initialized).
+    return 0;
+  }
+  timespec ts;
+  timespec_get(&ts, TIME_UTC);
+  return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000;
+}
+
+static MemprofAllocator &get_allocator();
+
+// The memory chunk allocated from the underlying allocator looks like this:
+// H H U U U U U U
+//   H -- ChunkHeader (32 bytes)
+//   U -- user memory.
+
+// If there is left padding before the ChunkHeader (due to use of memalign),
+// we store a magic value in the first uptr word of the memory block and
+// store the address of ChunkHeader in the next uptr.
+// M B L L L L L L L L L  H H U U U U U U
+//   |                    ^
+//   ---------------------|
+//   M -- magic value kAllocBegMagic
+//   B -- address of ChunkHeader pointing to the first 'H'
+
+constexpr uptr kMaxAllowedMallocBits = 40;
+
+// Should be no more than 32-bytes
+struct ChunkHeader {
+  // 1-st 4 bytes.
+  u32 alloc_context_id;
+  // 2-nd 4 bytes
+  u32 cpu_id;
+  // 3-rd 4 bytes
+  u32 timestamp_ms;
+  // 4-th 4 bytes
+  // Note only 1 bit is needed for this flag if we need space in the future for
+  // more fields.
+  u32 from_memalign;
+  // 5-th and 6-th 4 bytes
+  // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this
+  // could be shrunk to kMaxAllowedMallocBits if we need space in the future for
+  // more fields.
+  atomic_uint64_t user_requested_size;
+  // 23 bits available
+  // 7-th and 8-th 4 bytes
+  u64 data_type_id; // TODO: hash of type name
+};
+
+static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
+COMPILER_CHECK(kChunkHeaderSize == 32);
+
+struct MemprofChunk : ChunkHeader {
+  uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
+  uptr UsedSize() {
+    return atomic_load(&user_requested_size, memory_order_relaxed);
+  }
+  void *AllocBeg() {
+    if (from_memalign)
+      return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
+    return reinterpret_cast<void *>(this);
+  }
+};
+
+class LargeChunkHeader {
+  static constexpr uptr kAllocBegMagic =
+      FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL);
+  atomic_uintptr_t magic;
+  MemprofChunk *chunk_header;
+
+public:
+  MemprofChunk *Get() const {
+    return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic
+               ? chunk_header
+               : nullptr;
+  }
+
+  void Set(MemprofChunk *p) {
+    if (p) {
+      chunk_header = p;
+      atomic_store(&magic, kAllocBegMagic, memory_order_release);
+      return;
+    }
+
+    uptr old = kAllocBegMagic;
+    if (!atomic_compare_exchange_strong(&magic, &old, 0,
+                                        memory_order_release)) {
+      CHECK_EQ(old, kAllocBegMagic);
+    }
+  }
+};
+
+void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
+  // Since memprof's mapping is compacting, the shadow chunk may be
+  // not page-aligned, so we only flush the page-aligned portion.
+  ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
+}
+
+void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
+  // Statistics.
+  MemprofStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.mmaps++;
+  thread_stats.mmaped += size;
+}
+void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
+  // We are about to unmap a chunk of user memory.
+  // Mark the corresponding shadow memory as not needed.
+  FlushUnneededMemProfShadowMemory(p, size);
+  // Statistics.
+  MemprofStats &thread_stats = GetCurrentThreadStats();
+  thread_stats.munmaps++;
+  thread_stats.munmaped += size;
+}
+
+AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  return &ms->allocator_cache;
+}
+
+struct MemInfoBlock {
+  u32 alloc_count;
+  u64 total_access_count, min_access_count, max_access_count;
+  u64 total_size;
+  u32 min_size, max_size;
+  u32 alloc_timestamp, dealloc_timestamp;
+  u64 total_lifetime;
+  u32 min_lifetime, max_lifetime;
+  u32 alloc_cpu_id, dealloc_cpu_id;
+  u32 num_migrated_cpu;
+
+  // Only compared to prior deallocated object currently.
+  u32 num_lifetime_overlaps;
+  u32 num_same_alloc_cpu;
+  u32 num_same_dealloc_cpu;
+
+  u64 data_type_id; // TODO: hash of type name
+
+  MemInfoBlock() : alloc_count(0) {}
+
+  MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
+               u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
+      : alloc_count(1), total_access_count(access_count),
+        min_access_count(access_count), max_access_count(access_count),
+        total_size(size), min_size(size), max_size(size),
+        alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+        total_lifetime(dealloc_timestamp - alloc_timestamp),
+        min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+        alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+        num_lifetime_overlaps(0), num_same_alloc_cpu(0),
+        num_same_dealloc_cpu(0) {
+    num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+  }
+
+  void Print(u64 id) {
+    u64 p;
+    if (flags()->print_terse) {
+      p = total_size * 100 / alloc_count;
+      Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100,
+             min_size, max_size);
+      p = total_access_count * 100 / alloc_count;
+      Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count,
+             max_access_count);
+      p = total_lifetime * 100 / alloc_count;
+      Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime);
+      Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
+             num_same_alloc_cpu, num_same_dealloc_cpu);
+    } else {
+      p = total_size * 100 / alloc_count;
+      Printf("Memory allocation stack id = %llu\n", id);
+      Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n",
+             alloc_count, p / 100, p % 100, min_size, max_size);
+      p = total_access_count * 100 / alloc_count;
+      Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100,
+             p % 100, min_access_count, max_access_count);
+      p = total_lifetime * 100 / alloc_count;
+      Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100,
+             min_lifetime, max_lifetime);
+      Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
+             "cpu: %u, num same dealloc_cpu: %u\n",
+             num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
+             num_same_dealloc_cpu);
+    }
+  }
+
+  static void printHeader() {
+    CHECK(flags()->print_terse);
+    Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
+           "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
+           "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
+           "NumSameDeallocCpu\n");
+  }
+
+  void Merge(MemInfoBlock &newMIB) {
+    alloc_count += newMIB.alloc_count;
+
+    total_access_count += newMIB.total_access_count;
+    min_access_count = Min(min_access_count, newMIB.min_access_count);
+    max_access_count = Max(max_access_count, newMIB.max_access_count);
+
+    total_size += newMIB.total_size;
+    min_size = Min(min_size, newMIB.min_size);
+    max_size = Max(max_size, newMIB.max_size);
+
+    total_lifetime += newMIB.total_lifetime;
+    min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
+    max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
+
+    // We know newMIB was deallocated later, so just need to check if it was
+    // allocated before last one deallocated.
+    num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
+    alloc_timestamp = newMIB.alloc_timestamp;
+    dealloc_timestamp = newMIB.dealloc_timestamp;
+
+    num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
+    num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
+    alloc_cpu_id = newMIB.alloc_cpu_id;
+    dealloc_cpu_id = newMIB.dealloc_cpu_id;
+  }
+};
+
+static u32 AccessCount = 0;
+static u32 MissCount = 0;
+
+struct SetEntry {
+  SetEntry() : id(0), MIB() {}
+  bool Empty() { return id == 0; }
+  void Print() {
+    CHECK(!Empty());
+    MIB.Print(id);
+  }
+  // The stack id
+  u64 id;
+  MemInfoBlock MIB;
+};
+
+struct CacheSet {
+  enum { kSetSize = 4 };
+
+  void PrintAll() {
+    for (int i = 0; i < kSetSize; i++) {
+      if (Entries[i].Empty())
+        continue;
+      Entries[i].Print();
+    }
+  }
+  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
+    AccessCount++;
+    SetAccessCount++;
+
+    for (int i = 0; i < kSetSize; i++) {
+      auto id = Entries[i].id;
+      // Check if this is a hit or an empty entry. Since we always move any
+      // filled locations to the front of the array (see below), we don't need
+      // to look after finding the first empty entry.
+      if (id == new_id || !id) {
+        if (id == 0) {
+          Entries[i].id = new_id;
+          Entries[i].MIB = newMIB;
+        } else {
+          Entries[i].MIB.Merge(newMIB);
+        }
+        // Assuming some id locality, we try to swap the matching entry
+        // into the first set position.
+        if (i != 0) {
+          auto tmp = Entries[0];
+          Entries[0] = Entries[i];
+          Entries[i] = tmp;
+        }
+        return;
+      }
+    }
+
+    // Miss
+    MissCount++;
+    SetMissCount++;
+
+    // We try to find the entries with the lowest alloc count to be evicted:
+    int min_idx = 0;
+    u64 min_count = Entries[0].MIB.alloc_count;
+    for (int i = 1; i < kSetSize; i++) {
+      CHECK(!Entries[i].Empty());
+      if (Entries[i].MIB.alloc_count < min_count) {
+        min_idx = i;
+        min_count = Entries[i].MIB.alloc_count;
+      }
+    }
+
+    // Print the evicted entry profile information
+    if (!flags()->print_terse)
+      Printf("Evicted:\n");
+    Entries[min_idx].Print();
+
+    // Similar to the hit case, put new MIB in first set position.
+    if (min_idx != 0)
+      Entries[min_idx] = Entries[0];
+    Entries[0].id = new_id;
+    Entries[0].MIB = newMIB;
+  }
+
+  void PrintMissRate(int i) {
+    u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0;
+    Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount,
+           SetAccessCount, p / 100, p % 100);
+  }
+
+  SetEntry Entries[kSetSize];
+  u32 SetAccessCount = 0;
+  u32 SetMissCount = 0;
+};
+
+struct MemInfoBlockCache {
+  MemInfoBlockCache() {
+    if (common_flags()->print_module_map)
+      DumpProcessMap();
+    if (flags()->print_terse)
+      MemInfoBlock::printHeader();
+    Sets =
+        (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
+    Constructed = true;
+  }
+
+  ~MemInfoBlockCache() { free(Sets); }
+
+  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
+    u64 hv = new_id;
+
+    // Use mod method where number of entries should be a prime close to power
+    // of 2.
+    hv %= flags()->mem_info_cache_entries;
+
+    return Sets[hv].insertOrMerge(new_id, newMIB);
+  }
+
+  void PrintAll() {
+    for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
+      Sets[i].PrintAll();
+    }
+  }
+
+  void PrintMissRate() {
+    if (!flags()->print_mem_info_cache_miss_rate)
+      return;
+    u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
+    Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount,
+           p / 100, p % 100);
+    if (flags()->print_mem_info_cache_miss_rate_details)
+      for (int i = 0; i < flags()->mem_info_cache_entries; i++)
+        Sets[i].PrintMissRate(i);
+  }
+
+  CacheSet *Sets;
+  // Flag when the Sets have been allocated, in case a deallocation is called
+  // very early before the static init of the Allocator and therefore this table
+  // have completed.
+  bool Constructed = false;
+};
+
+// Accumulates the access count from the shadow for the given pointer and size.
+u64 GetShadowCount(uptr p, u32 size) {
+  u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
+  u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
+  u64 count = 0;
+  for (; shadow <= shadow_end; shadow++)
+    count += *shadow;
+  return count;
+}
+
+// Clears the shadow counters (when memory is allocated).
+void ClearShadow(uptr addr, uptr size) {
+  CHECK(AddrIsAlignedByGranularity(addr));
+  CHECK(AddrIsInMem(addr));
+  CHECK(AddrIsAlignedByGranularity(addr + size));
+  CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
+  CHECK(REAL(memset));
+  uptr shadow_beg = MEM_TO_SHADOW(addr);
+  uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+  if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
+    REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+  } else {
+    uptr page_size = GetPageSizeCached();
+    uptr page_beg = RoundUpTo(shadow_beg, page_size);
+    uptr page_end = RoundDownTo(shadow_end, page_size);
+
+    if (page_beg >= page_end) {
+      REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+    } else {
+      if (page_beg != shadow_beg) {
+        REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
+      }
+      if (page_end != shadow_end) {
+        REAL(memset)((void *)page_end, 0, shadow_end - page_end);
+      }
+      ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
+    }
+  }
+}
+
+struct Allocator {
+  static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
+
+  MemprofAllocator allocator;
+  StaticSpinMutex fallback_mutex;
+  AllocatorCache fallback_allocator_cache;
+
+  uptr max_user_defined_malloc_size;
+  atomic_uint8_t rss_limit_exceeded;
+
+  MemInfoBlockCache MemInfoBlockTable;
+  bool destructing;
+
+  // ------------------- Initialization ------------------------
+  explicit Allocator(LinkerInitialized) : destructing(false) {}
+
+  ~Allocator() { FinishAndPrint(); }
+
+  void FinishAndPrint() {
+    if (!flags()->print_terse)
+      Printf("Live on exit:\n");
+    allocator.ForceLock();
+    allocator.ForEachChunk(
+        [](uptr chunk, void *alloc) {
+          u64 user_requested_size;
+          MemprofChunk *m =
+              ((Allocator *)alloc)
+                  ->GetMemprofChunk((void *)chunk, user_requested_size);
+          if (!m)
+            return;
+          uptr user_beg = ((uptr)m) + kChunkHeaderSize;
+          u64 c = GetShadowCount(user_beg, user_requested_size);
+          long curtime = GetTimestamp();
+          MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
+                              m->cpu_id, GetCpuId());
+          ((Allocator *)alloc)
+              ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+        },
+        this);
+    allocator.ForceUnlock();
+
+    destructing = true;
+    MemInfoBlockTable.PrintMissRate();
+    MemInfoBlockTable.PrintAll();
+    StackDepotPrintAll();
+  }
+
+  void InitLinkerInitialized() {
+    SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+    allocator.InitLinkerInitialized(
+        common_flags()->allocator_release_to_os_interval_ms);
+    max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
+                                       ? common_flags()->max_allocation_size_mb
+                                             << 20
+                                       : kMaxAllowedMallocSize;
+  }
+
+  bool RssLimitExceeded() {
+    return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
+  }
+
+  void SetRssLimitExceeded(bool limit_exceeded) {
+    atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
+  }
+
+  // -------------------- Allocation/Deallocation routines ---------------
+  void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
+                 AllocType alloc_type) {
+    if (UNLIKELY(!memprof_inited))
+      MemprofInitFromRtl();
+    if (RssLimitExceeded()) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportRssLimitExceeded(stack);
+    }
+    CHECK(stack);
+    const uptr min_alignment = MEMPROF_ALIGNMENT;
+    if (alignment < min_alignment)
+      alignment = min_alignment;
+    if (size == 0) {
+      // We'd be happy to avoid allocating memory for zero-size requests, but
+      // some programs/tests depend on this behavior and assume that malloc
+      // would not return NULL even for zero-size allocations. Moreover, it
+      // looks like operator new should never return NULL, and results of
+      // consecutive "new" calls must be 
diff erent even if the allocated size
+      // is zero.
+      size = 1;
+    }
+    CHECK(IsPowerOfTwo(alignment));
+    uptr rounded_size = RoundUpTo(size, alignment);
+    uptr needed_size = rounded_size + kChunkHeaderSize;
+    if (alignment > min_alignment)
+      needed_size += alignment;
+    CHECK(IsAligned(needed_size, min_alignment));
+    if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
+        size > max_user_defined_malloc_size) {
+      if (AllocatorMayReturnNull()) {
+        Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n",
+               (void *)size);
+        return nullptr;
+      }
+      uptr malloc_limit =
+          Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
+      ReportAllocationSizeTooBig(size, malloc_limit, stack);
+    }
+
+    MemprofThread *t = GetCurrentThread();
+    void *allocated;
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocated = allocator.Allocate(cache, needed_size, 8);
+    }
+    if (UNLIKELY(!allocated)) {
+      SetAllocatorOutOfMemory();
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportOutOfMemory(size, stack);
+    }
+
+    uptr alloc_beg = reinterpret_cast<uptr>(allocated);
+    uptr alloc_end = alloc_beg + needed_size;
+    uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
+    uptr user_beg = beg_plus_header;
+    if (!IsAligned(user_beg, alignment))
+      user_beg = RoundUpTo(user_beg, alignment);
+    uptr user_end = user_beg + size;
+    CHECK_LE(user_end, alloc_end);
+    uptr chunk_beg = user_beg - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+    m->from_memalign = alloc_beg != chunk_beg;
+    CHECK(size);
+
+    m->cpu_id = GetCpuId();
+    m->timestamp_ms = GetTimestamp();
+    m->alloc_context_id = StackDepotPut(*stack);
+
+    uptr size_rounded_down_to_granularity =
+        RoundDownTo(size, SHADOW_GRANULARITY);
+    if (size_rounded_down_to_granularity)
+      ClearShadow(user_beg, size_rounded_down_to_granularity);
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.mallocs++;
+    thread_stats.malloced += size;
+    thread_stats.malloced_overhead += needed_size - size;
+    if (needed_size > SizeClassMap::kMaxSize)
+      thread_stats.malloc_large++;
+    else
+      thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
+
+    void *res = reinterpret_cast<void *>(user_beg);
+    atomic_store(&m->user_requested_size, size, memory_order_release);
+    if (alloc_beg != chunk_beg) {
+      CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg);
+      reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m);
+    }
+    MEMPROF_MALLOC_HOOK(res, size);
+    return res;
+  }
+
+  void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
+                  BufferedStackTrace *stack, AllocType alloc_type) {
+    uptr p = reinterpret_cast<uptr>(ptr);
+    if (p == 0)
+      return;
+
+    MEMPROF_FREE_HOOK(ptr);
+
+    uptr chunk_beg = p - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+
+    u64 user_requested_size =
+        atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
+    if (memprof_inited && memprof_init_done && !destructing &&
+        MemInfoBlockTable.Constructed) {
+      u64 c = GetShadowCount(p, user_requested_size);
+      long curtime = GetTimestamp();
+
+      MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
+                          m->cpu_id, GetCpuId());
+      {
+        SpinMutexLock l(&fallback_mutex);
+        MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+      }
+    }
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.frees++;
+    thread_stats.freed += user_requested_size;
+
+    void *alloc_beg = m->AllocBeg();
+    if (alloc_beg != m) {
+      // Clear the magic value, as allocator internals may overwrite the
+      // contents of deallocated chunk, confusing GetMemprofChunk lookup.
+      reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr);
+    }
+
+    MemprofThread *t = GetCurrentThread();
+    if (t) {
+      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+      allocator.Deallocate(cache, alloc_beg);
+    } else {
+      SpinMutexLock l(&fallback_mutex);
+      AllocatorCache *cache = &fallback_allocator_cache;
+      allocator.Deallocate(cache, alloc_beg);
+    }
+  }
+
+  void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
+    CHECK(old_ptr && new_size);
+    uptr p = reinterpret_cast<uptr>(old_ptr);
+    uptr chunk_beg = p - kChunkHeaderSize;
+    MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
+
+    MemprofStats &thread_stats = GetCurrentThreadStats();
+    thread_stats.reallocs++;
+    thread_stats.realloced += new_size;
+
+    void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
+    if (new_ptr) {
+      CHECK_NE(REAL(memcpy), nullptr);
+      uptr memcpy_size = Min(new_size, m->UsedSize());
+      REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
+      Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
+    }
+    return new_ptr;
+  }
+
+  void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+    if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportCallocOverflow(nmemb, size, stack);
+    }
+    void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
+    // If the memory comes from the secondary allocator no need to clear it
+    // as it comes directly from mmap.
+    if (ptr && allocator.FromPrimary(ptr))
+      REAL(memset)(ptr, 0, nmemb * size);
+    return ptr;
+  }
+
+  void CommitBack(MemprofThreadLocalMallocStorage *ms,
+                  BufferedStackTrace *stack) {
+    AllocatorCache *ac = GetAllocatorCache(ms);
+    allocator.SwallowCache(ac);
+  }
+
+  // -------------------------- Chunk lookup ----------------------
+
+  // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
+  MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) {
+    if (!alloc_beg)
+      return nullptr;
+    MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get();
+    if (!p) {
+      if (!allocator.FromPrimary(alloc_beg))
+        return nullptr;
+      p = reinterpret_cast<MemprofChunk *>(alloc_beg);
+    }
+    // The size is reset to 0 on deallocation (and a min of 1 on
+    // allocation).
+    user_requested_size =
+        atomic_load(&p->user_requested_size, memory_order_acquire);
+    if (user_requested_size)
+      return p;
+    return nullptr;
+  }
+
+  MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) {
+    void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
+    return GetMemprofChunk(alloc_beg, user_requested_size);
+  }
+
+  uptr AllocationSize(uptr p) {
+    u64 user_requested_size;
+    MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size);
+    if (!m)
+      return 0;
+    if (m->Beg() != p)
+      return 0;
+    return user_requested_size;
+  }
+
+  void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); }
+
+  void PrintStats() { allocator.PrintStats(); }
+
+  void ForceLock() {
+    allocator.ForceLock();
+    fallback_mutex.Lock();
+  }
+
+  void ForceUnlock() {
+    fallback_mutex.Unlock();
+    allocator.ForceUnlock();
+  }
+};
+
+static Allocator instance(LINKER_INITIALIZED);
+
+static MemprofAllocator &get_allocator() { return instance.allocator; }
+
+void InitializeAllocator() { instance.InitLinkerInitialized(); }
+
+void MemprofThreadLocalMallocStorage::CommitBack() {
+  GET_STACK_TRACE_MALLOC;
+  instance.CommitBack(this, &stack);
+}
+
+void PrintInternalAllocatorStats() { instance.PrintStats(); }
+
+void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, 0, 0, stack, alloc_type);
+}
+
+void memprof_delete(void *ptr, uptr size, uptr alignment,
+                    BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, size, alignment, stack, alloc_type);
+}
+
+void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
+}
+
+void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
+}
+
+void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
+                           BufferedStackTrace *stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportReallocArrayOverflow(nmemb, size, stack);
+  }
+  return memprof_realloc(p, nmemb * size, stack);
+}
+
+void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
+  if (!p)
+    return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
+  if (size == 0) {
+    if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
+      instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
+      return nullptr;
+    }
+    // Allocate a size of 1 if we shouldn't free() on Realloc to 0
+    size = 1;
+  }
+  return SetErrnoOnNull(instance.Reallocate(p, size, stack));
+}
+
+void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
+  return SetErrnoOnNull(
+      instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
+}
+
+void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportPvallocOverflow(size, stack);
+  }
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
+}
+
+void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                       AllocType alloc_type) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAllocationAlignment(alignment, stack);
+  }
+  return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
+}
+
+void *memprof_aligned_alloc(uptr alignment, uptr size,
+                            BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAlignedAllocAlignment(size, alignment, stack);
+  }
+  return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
+}
+
+int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
+                           BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    ReportInvalidPosixMemalignAlignment(alignment, stack);
+  }
+  void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
+  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by Allocate.
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
+uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
+  if (!ptr)
+    return 0;
+  uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
+  return usable_size;
+}
+
+void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) {
+  instance.SetRssLimitExceeded(limit_exceeded);
+}
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr,
+                             uptr size) {
+  (void)ptr;
+  (void)size;
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
+  (void)ptr;
+}
+#endif
+
+uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
+
+int __sanitizer_get_ownership(const void *p) {
+  return memprof_malloc_usable_size(p, 0, 0) != 0;
+}
+
+uptr __sanitizer_get_allocated_size(const void *p) {
+  return memprof_malloc_usable_size(p, 0, 0);
+}

diff  --git a/compiler-rt/lib/memprof/memprof_allocator.h b/compiler-rt/lib/memprof/memprof_allocator.h
new file mode 100644
index 000000000000..070b8b2f2737
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_allocator.h
@@ -0,0 +1,105 @@
+//===-- memprof_allocator.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_allocator.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_ALLOCATOR_H
+#define MEMPROF_ALLOCATOR_H
+
+#include "memprof_flags.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_list.h"
+
+#if !defined(__x86_64__)
+#error Unsupported platform
+#endif
+#if !SANITIZER_CAN_USE_ALLOCATOR64
+#error Only 64-bit allocator supported
+#endif
+
+namespace __memprof {
+
+enum AllocType {
+  FROM_MALLOC = 1, // Memory block came from malloc, calloc, realloc, etc.
+  FROM_NEW = 2,    // Memory block came from operator new.
+  FROM_NEW_BR = 3  // Memory block came from operator new [ ]
+};
+
+void InitializeAllocator();
+
+struct MemprofMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const;
+  void OnUnmap(uptr p, uptr size) const;
+};
+
+constexpr uptr kAllocatorSpace = 0x600000000000ULL;
+constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T.
+typedef DefaultSizeClassMap SizeClassMap;
+template <typename AddressSpaceViewTy>
+struct AP64 { // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef __memprof::SizeClassMap SizeClassMap;
+  typedef MemprofMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = AddressSpaceViewTy;
+};
+
+template <typename AddressSpaceView>
+using PrimaryAllocatorASVT = SizeClassAllocator64<AP64<AddressSpaceView>>;
+using PrimaryAllocator = PrimaryAllocatorASVT<LocalAddressSpaceView>;
+
+static const uptr kNumberOfSizeClasses = SizeClassMap::kNumClasses;
+
+template <typename AddressSpaceView>
+using MemprofAllocatorASVT =
+    CombinedAllocator<PrimaryAllocatorASVT<AddressSpaceView>>;
+using MemprofAllocator = MemprofAllocatorASVT<LocalAddressSpaceView>;
+using AllocatorCache = MemprofAllocator::AllocatorCache;
+
+struct MemprofThreadLocalMallocStorage {
+  uptr quarantine_cache[16];
+  AllocatorCache allocator_cache;
+  void CommitBack();
+
+private:
+  // These objects are allocated via mmap() and are zero-initialized.
+  MemprofThreadLocalMallocStorage() {}
+};
+
+void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                       AllocType alloc_type);
+void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type);
+void memprof_delete(void *ptr, uptr size, uptr alignment,
+                    BufferedStackTrace *stack, AllocType alloc_type);
+
+void *memprof_malloc(uptr size, BufferedStackTrace *stack);
+void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack);
+void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack);
+void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
+                           BufferedStackTrace *stack);
+void *memprof_valloc(uptr size, BufferedStackTrace *stack);
+void *memprof_pvalloc(uptr size, BufferedStackTrace *stack);
+
+void *memprof_aligned_alloc(uptr alignment, uptr size,
+                            BufferedStackTrace *stack);
+int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
+                           BufferedStackTrace *stack);
+uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp);
+
+void PrintInternalAllocatorStats();
+void MemprofSoftRssLimitExceededCallback(bool exceeded);
+
+} // namespace __memprof
+#endif // MEMPROF_ALLOCATOR_H

diff  --git a/compiler-rt/lib/memprof/memprof_descriptions.cpp b/compiler-rt/lib/memprof/memprof_descriptions.cpp
new file mode 100644
index 000000000000..ebd81d6f2f23
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_descriptions.cpp
@@ -0,0 +1,70 @@
+//===-- memprof_descriptions.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf functions for getting information about an address and/or printing
+// it.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_descriptions.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+MemprofThreadIdAndName::MemprofThreadIdAndName(MemprofThreadContext *t) {
+  Init(t->tid, t->name);
+}
+
+MemprofThreadIdAndName::MemprofThreadIdAndName(u32 tid) {
+  if (tid == kInvalidTid) {
+    Init(tid, "");
+  } else {
+    memprofThreadRegistry().CheckLocked();
+    MemprofThreadContext *t = GetThreadContextByTidLocked(tid);
+    Init(tid, t->name);
+  }
+}
+
+void MemprofThreadIdAndName::Init(u32 tid, const char *tname) {
+  int len = internal_snprintf(name, sizeof(name), "T%d", tid);
+  CHECK(((unsigned int)len) < sizeof(name));
+  if (tname[0] != '\0')
+    internal_snprintf(&name[len], sizeof(name) - len, " (%s)", tname);
+}
+
+void DescribeThread(MemprofThreadContext *context) {
+  CHECK(context);
+  memprofThreadRegistry().CheckLocked();
+  // No need to announce the main thread.
+  if (context->tid == 0 || context->announced) {
+    return;
+  }
+  context->announced = true;
+  InternalScopedString str(1024);
+  str.append("Thread %s", MemprofThreadIdAndName(context).c_str());
+  if (context->parent_tid == kInvalidTid) {
+    str.append(" created by unknown thread\n");
+    Printf("%s", str.data());
+    return;
+  }
+  str.append(" created by %s here:\n",
+             MemprofThreadIdAndName(context->parent_tid).c_str());
+  Printf("%s", str.data());
+  StackDepotGet(context->stack_id).Print();
+  // Recursively described parent thread if needed.
+  if (flags()->print_full_thread_history) {
+    MemprofThreadContext *parent_context =
+        GetThreadContextByTidLocked(context->parent_tid);
+    DescribeThread(parent_context);
+  }
+}
+
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_descriptions.h b/compiler-rt/lib/memprof/memprof_descriptions.h
new file mode 100644
index 000000000000..e88ea441bf9e
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_descriptions.h
@@ -0,0 +1,45 @@
+//===-- memprof_descriptions.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_descriptions.cpp.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_DESCRIPTIONS_H
+#define MEMPROF_DESCRIPTIONS_H
+
+#include "memprof_allocator.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+
+namespace __memprof {
+
+void DescribeThread(MemprofThreadContext *context);
+inline void DescribeThread(MemprofThread *t) {
+  if (t)
+    DescribeThread(t->context());
+}
+
+class MemprofThreadIdAndName {
+public:
+  explicit MemprofThreadIdAndName(MemprofThreadContext *t);
+  explicit MemprofThreadIdAndName(u32 tid);
+
+  // Contains "T%tid (%name)" or "T%tid" if the name is empty.
+  const char *c_str() const { return &name[0]; }
+
+private:
+  void Init(u32 tid, const char *tname);
+
+  char name[128];
+};
+
+} // namespace __memprof
+
+#endif // MEMPROF_DESCRIPTIONS_H

diff  --git a/compiler-rt/lib/memprof/memprof_flags.cpp b/compiler-rt/lib/memprof/memprof_flags.cpp
new file mode 100644
index 000000000000..b107ff8fa0a7
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.cpp
@@ -0,0 +1,93 @@
+//===-- memprof_flags.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_flags.h"
+#include "memprof_interface_internal.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
+namespace __memprof {
+
+Flags memprof_flags_dont_use_directly; // use via flags().
+
+static const char *MaybeUseMemprofDefaultOptionsCompileDefinition() {
+#ifdef MEMPROF_DEFAULT_OPTIONS
+  return SANITIZER_STRINGIFY(MEMPROF_DEFAULT_OPTIONS);
+#else
+  return "";
+#endif
+}
+
+void Flags::SetDefaults() {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+}
+
+static void RegisterMemprofFlags(FlagParser *parser, Flags *f) {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description)                    \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+}
+
+void InitializeFlags() {
+  // Set the default values and prepare for parsing MemProf and common flags.
+  SetCommonFlagsDefaults();
+  {
+    CommonFlags cf;
+    cf.CopyFrom(*common_flags());
+    cf.external_symbolizer_path = GetEnv("MEMPROF_SYMBOLIZER_PATH");
+    cf.malloc_context_size = kDefaultMallocContextSize;
+    cf.intercept_tls_get_addr = true;
+    cf.exitcode = 1;
+    OverrideCommonFlags(cf);
+  }
+  Flags *f = flags();
+  f->SetDefaults();
+
+  FlagParser memprof_parser;
+  RegisterMemprofFlags(&memprof_parser, f);
+  RegisterCommonFlags(&memprof_parser);
+
+  // Override from MemProf compile definition.
+  const char *memprof_compile_def =
+      MaybeUseMemprofDefaultOptionsCompileDefinition();
+  memprof_parser.ParseString(memprof_compile_def);
+
+  // Override from user-specified string.
+  const char *memprof_default_options = __memprof_default_options();
+  memprof_parser.ParseString(memprof_default_options);
+
+  // Override from command line.
+  memprof_parser.ParseStringFromEnv("MEMPROF_OPTIONS");
+
+  InitializeCommonFlags();
+
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+
+  if (common_flags()->help) {
+    memprof_parser.PrintFlagDescriptions();
+  }
+
+  CHECK_LE((uptr)common_flags()->malloc_context_size, kStackTraceMax);
+}
+
+} // namespace __memprof
+
+SANITIZER_INTERFACE_WEAK_DEF(const char *, __memprof_default_options, void) {
+  return "";
+}

diff  --git a/compiler-rt/lib/memprof/memprof_flags.h b/compiler-rt/lib/memprof/memprof_flags.h
new file mode 100644
index 000000000000..2f2b628653dc
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.h
@@ -0,0 +1,45 @@
+//===-- memprof_flags.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_FLAGS_H
+#define MEMPROF_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// MemProf flag values can be defined in four ways:
+// 1) initialized with default values at startup.
+// 2) overriden during compilation of MemProf runtime by providing
+//    compile definition MEMPROF_DEFAULT_OPTIONS.
+// 3) overriden from string returned by user-specified function
+//    __memprof_default_options().
+// 4) overriden from env variable MEMPROF_OPTIONS.
+
+namespace __memprof {
+
+struct Flags {
+#define MEMPROF_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "memprof_flags.inc"
+#undef MEMPROF_FLAG
+
+  void SetDefaults();
+};
+
+extern Flags memprof_flags_dont_use_directly;
+inline Flags *flags() { return &memprof_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __memprof
+
+#endif // MEMPROF_FLAGS_H

diff  --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
new file mode 100644
index 000000000000..035fd15b9288
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -0,0 +1,49 @@
+//===-- memprof_flags.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MemProf runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_FLAG
+#error "Define MEMPROF_FLAG prior to including this file!"
+#endif
+
+// MEMPROF_FLAG(Type, Name, DefaultValue, Description)
+// See COMMON_FLAG in sanitizer_flags.inc for more details.
+
+MEMPROF_FLAG(bool, unmap_shadow_on_exit, false,
+             "If set, explicitly unmaps the (huge) shadow at exit.")
+MEMPROF_FLAG(bool, protect_shadow_gap, true, "If set, mprotect the shadow gap")
+MEMPROF_FLAG(bool, print_legend, true, "Print the legend for the shadow bytes.")
+MEMPROF_FLAG(bool, atexit, false,
+             "If set, prints MemProf exit stats even after program terminates "
+             "successfully.")
+MEMPROF_FLAG(
+    bool, print_full_thread_history, true,
+    "If set, prints thread creation stacks for the threads involved in the "
+    "report and their ancestors up to the main thread.")
+
+MEMPROF_FLAG(bool, halt_on_error, true,
+             "Crash the program after printing the first error report "
+             "(WARNING: USE AT YOUR OWN RISK!)")
+MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
+             "realloc(p, 0) is equivalent to free(p) by default (Same as the "
+             "POSIX standard). If set to false, realloc(p, 0) will return a "
+             "pointer to an allocated space which can not be used.")
+MEMPROF_FLAG(bool, print_terse, false,
+             "If set, prints memory profile in a terse format.")
+
+MEMPROF_FLAG(
+    int, mem_info_cache_entries, 16381,
+    "Size in entries of the mem info block cache, should be closest prime"
+    " number to a power of two for best hashing.")
+MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false,
+             "If set, prints the miss rate of the mem info block cache.")
+MEMPROF_FLAG(
+    bool, print_mem_info_cache_miss_rate_details, false,
+    "If set, prints detailed miss rates of the mem info block cache sets.")

diff  --git a/compiler-rt/lib/memprof/memprof_init_version.h b/compiler-rt/lib/memprof/memprof_init_version.h
new file mode 100644
index 000000000000..26c68f78677a
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_init_version.h
@@ -0,0 +1,26 @@
+//===-- memprof_init_version.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// This header defines a versioned __memprof_init function to be called at the
+// startup of the instrumented program.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INIT_VERSION_H
+#define MEMPROF_INIT_VERSION_H
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+extern "C" {
+// Every time the Memprof ABI changes we also change the version number in the
+// __memprof_init function name.  Objects built with incompatible Memprof ABI
+// versions will not link with run-time.
+#define __memprof_version_mismatch_check __memprof_version_mismatch_check_v1
+}
+
+#endif // MEMPROF_INIT_VERSION_H

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
new file mode 100644
index 000000000000..caa629b9c474
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -0,0 +1,366 @@
+//===-- memprof_interceptors.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Intercept various libc functions.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_interceptors.h"
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "memprof_stats.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_posix.h"
+
+namespace __memprof {
+
+#define MEMPROF_READ_STRING(s, n) MEMPROF_READ_RANGE((s), (n))
+
+static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
+#if SANITIZER_INTERCEPT_STRNLEN
+  if (REAL(strnlen)) {
+    return REAL(strnlen)(s, maxlen);
+  }
+#endif
+  return internal_strnlen(s, maxlen);
+}
+
+void SetThreadName(const char *name) {
+  MemprofThread *t = GetCurrentThread();
+  if (t)
+    memprofThreadRegistry().SetThreadName(t->tid(), name);
+}
+
+int OnExit() {
+  // FIXME: ask frontend whether we need to return failure.
+  return 0;
+}
+
+} // namespace __memprof
+
+// ---------------------- Wrappers ---------------- {{{1
+using namespace __memprof;
+
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
+
+#define MEMPROF_INTERCEPTOR_ENTER(ctx, func)                                   \
+  ctx = 0;                                                                     \
+  (void)ctx;
+
+#define COMMON_INTERCEPT_FUNCTION(name) MEMPROF_INTERCEPT_FUNC(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                               \
+  MEMPROF_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
+  MEMPROF_WRITE_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                          \
+  MEMPROF_READ_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                               \
+  MEMPROF_INTERCEPTOR_ENTER(ctx, func);                                        \
+  do {                                                                         \
+    if (memprof_init_is_running)                                               \
+      return REAL(func)(__VA_ARGS__);                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+  } while (false)
+#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path)                              \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd)                                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd)                                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd)                    \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) SetThreadName(name)
+// Should be memprofThreadRegistry().SetThreadNameByUserId(thread, name)
+// But memprof does not remember UserId's for threads (pthread_t);
+// and remembers all ever existed threads, so the linear search by UserId
+// can be slow.
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)                 \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
+#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag)                           \
+  do {                                                                         \
+    CheckNoDeepBind(filename, flag);                                           \
+  } while (false)
+#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
+#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!memprof_inited)
+#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
+  if (MemprofThread *t = GetCurrentThread()) {                                 \
+    *begin = t->tls_begin();                                                   \
+    *end = t->tls_end();                                                       \
+  } else {                                                                     \
+    *begin = *end = 0;                                                         \
+  }
+
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size)                   \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memmove);                                   \
+    MEMPROF_MEMMOVE_IMPL(to, from, size);                                      \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size)                    \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memcpy);                                    \
+    MEMPROF_MEMCPY_IMPL(to, from, size);                                       \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size)                    \
+  do {                                                                         \
+    MEMPROF_INTERCEPTOR_ENTER(ctx, memset);                                    \
+    MEMPROF_MEMSET_IMPL(block, c, size);                                       \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
+#define COMMON_SYSCALL_PRE_READ_RANGE(p, s) MEMPROF_READ_RANGE(p, s)
+#define COMMON_SYSCALL_PRE_WRITE_RANGE(p, s) MEMPROF_WRITE_RANGE(p, s)
+#define COMMON_SYSCALL_POST_READ_RANGE(p, s)                                   \
+  do {                                                                         \
+    (void)(p);                                                                 \
+    (void)(s);                                                                 \
+  } while (false)
+#define COMMON_SYSCALL_POST_WRITE_RANGE(p, s)                                  \
+  do {                                                                         \
+    (void)(p);                                                                 \
+    (void)(s);                                                                 \
+  } while (false)
+#include "sanitizer_common/sanitizer_common_syscalls.inc"
+
+struct ThreadStartParam {
+  atomic_uintptr_t t;
+  atomic_uintptr_t is_registered;
+};
+
+static thread_return_t THREAD_CALLING_CONV memprof_thread_start(void *arg) {
+  ThreadStartParam *param = reinterpret_cast<ThreadStartParam *>(arg);
+  MemprofThread *t = nullptr;
+  while ((t = reinterpret_cast<MemprofThread *>(
+              atomic_load(&param->t, memory_order_acquire))) == nullptr)
+    internal_sched_yield();
+  SetCurrentThread(t);
+  return t->ThreadStart(GetTid(), &param->is_registered);
+}
+
+INTERCEPTOR(int, pthread_create, void *thread, void *attr,
+            void *(*start_routine)(void *), void *arg) {
+  EnsureMainThreadIDIsCorrect();
+  GET_STACK_TRACE_THREAD;
+  int detached = 0;
+  if (attr)
+    REAL(pthread_attr_getdetachstate)(attr, &detached);
+  ThreadStartParam param;
+  atomic_store(&param.t, 0, memory_order_relaxed);
+  atomic_store(&param.is_registered, 0, memory_order_relaxed);
+  int result;
+  {
+    // Ignore all allocations made by pthread_create: thread stack/TLS may be
+    // stored by pthread for future reuse even after thread destruction, and
+    // the linked list it's stored in doesn't even hold valid pointers to the
+    // objects, the latter are calculated by obscure pointer arithmetic.
+    result = REAL(pthread_create)(thread, attr, memprof_thread_start, &param);
+  }
+  if (result == 0) {
+    u32 current_tid = GetCurrentTidOrInvalid();
+    MemprofThread *t = MemprofThread::Create(start_routine, arg, current_tid,
+                                             &stack, detached);
+    atomic_store(&param.t, reinterpret_cast<uptr>(t), memory_order_release);
+    // Wait until the MemprofThread object is initialized and the
+    // ThreadRegistry entry is in "started" state.
+    while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
+      internal_sched_yield();
+  }
+  return result;
+}
+
+INTERCEPTOR(int, pthread_join, void *t, void **arg) {
+  return real_pthread_join(t, arg);
+}
+
+DEFINE_REAL_PTHREAD_FUNCTIONS
+
+INTERCEPTOR(char *, index, const char *string, int c)
+ALIAS(WRAPPER_NAME(strchr));
+
+// For both strcat() and strncat() we need to check the validity of |to|
+// argument irrespective of the |from| length.
+INTERCEPTOR(char *, strcat, char *to, const char *from) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strcat);
+  ENSURE_MEMPROF_INITED();
+  uptr from_length = REAL(strlen)(from);
+  MEMPROF_READ_RANGE(from, from_length + 1);
+  uptr to_length = REAL(strlen)(to);
+  MEMPROF_READ_STRING(to, to_length);
+  MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
+  return REAL(strcat)(to, from);
+}
+
+INTERCEPTOR(char *, strncat, char *to, const char *from, uptr size) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strncat);
+  ENSURE_MEMPROF_INITED();
+  uptr from_length = MaybeRealStrnlen(from, size);
+  uptr copy_length = Min(size, from_length + 1);
+  MEMPROF_READ_RANGE(from, copy_length);
+  uptr to_length = REAL(strlen)(to);
+  MEMPROF_READ_STRING(to, to_length);
+  MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
+  return REAL(strncat)(to, from, size);
+}
+
+INTERCEPTOR(char *, strcpy, char *to, const char *from) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strcpy);
+  if (memprof_init_is_running) {
+    return REAL(strcpy)(to, from);
+  }
+  ENSURE_MEMPROF_INITED();
+  uptr from_size = REAL(strlen)(from) + 1;
+  MEMPROF_READ_RANGE(from, from_size);
+  MEMPROF_WRITE_RANGE(to, from_size);
+  return REAL(strcpy)(to, from);
+}
+
+INTERCEPTOR(char *, strdup, const char *s) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strdup);
+  if (UNLIKELY(!memprof_inited))
+    return internal_strdup(s);
+  ENSURE_MEMPROF_INITED();
+  uptr length = REAL(strlen)(s);
+  MEMPROF_READ_RANGE(s, length + 1);
+  GET_STACK_TRACE_MALLOC;
+  void *new_mem = memprof_malloc(length + 1, &stack);
+  REAL(memcpy)(new_mem, s, length + 1);
+  return reinterpret_cast<char *>(new_mem);
+}
+
+INTERCEPTOR(char *, __strdup, const char *s) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strdup);
+  if (UNLIKELY(!memprof_inited))
+    return internal_strdup(s);
+  ENSURE_MEMPROF_INITED();
+  uptr length = REAL(strlen)(s);
+  MEMPROF_READ_RANGE(s, length + 1);
+  GET_STACK_TRACE_MALLOC;
+  void *new_mem = memprof_malloc(length + 1, &stack);
+  REAL(memcpy)(new_mem, s, length + 1);
+  return reinterpret_cast<char *>(new_mem);
+}
+
+INTERCEPTOR(char *, strncpy, char *to, const char *from, uptr size) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strncpy);
+  ENSURE_MEMPROF_INITED();
+  uptr from_size = Min(size, MaybeRealStrnlen(from, size) + 1);
+  MEMPROF_READ_RANGE(from, from_size);
+  MEMPROF_WRITE_RANGE(to, size);
+  return REAL(strncpy)(to, from, size);
+}
+
+INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strtol);
+  ENSURE_MEMPROF_INITED();
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, base);
+  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
+  return result;
+}
+
+INTERCEPTOR(int, atoi, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atoi);
+  ENSURE_MEMPROF_INITED();
+  char *real_endptr;
+  // "man atoi" tells that behavior of atoi(nptr) is the same as
+  // strtol(nptr, 0, 10), i.e. it sets errno to ERANGE if the
+  // parsed integer can't be stored in *long* type (even if it's
+  // 
diff erent from int). So, we just imitate this behavior.
+  int result = REAL(strtol)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+INTERCEPTOR(long, atol, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atol);
+  ENSURE_MEMPROF_INITED();
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, strtoll);
+  ENSURE_MEMPROF_INITED();
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, base);
+  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
+  return result;
+}
+
+INTERCEPTOR(long long, atoll, const char *nptr) {
+  void *ctx;
+  MEMPROF_INTERCEPTOR_ENTER(ctx, atoll);
+  ENSURE_MEMPROF_INITED();
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  MEMPROF_READ_STRING(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+// ---------------------- InitializeMemprofInterceptors ---------------- {{{1
+namespace __memprof {
+void InitializeMemprofInterceptors() {
+  static bool was_called_once;
+  CHECK(!was_called_once);
+  was_called_once = true;
+  InitializeCommonInterceptors();
+
+  // Intercept str* functions.
+  MEMPROF_INTERCEPT_FUNC(strcat);
+  MEMPROF_INTERCEPT_FUNC(strcpy);
+  MEMPROF_INTERCEPT_FUNC(strncat);
+  MEMPROF_INTERCEPT_FUNC(strncpy);
+  MEMPROF_INTERCEPT_FUNC(strdup);
+  MEMPROF_INTERCEPT_FUNC(__strdup);
+  MEMPROF_INTERCEPT_FUNC(index);
+
+  MEMPROF_INTERCEPT_FUNC(atoi);
+  MEMPROF_INTERCEPT_FUNC(atol);
+  MEMPROF_INTERCEPT_FUNC(strtol);
+  MEMPROF_INTERCEPT_FUNC(atoll);
+  MEMPROF_INTERCEPT_FUNC(strtoll);
+
+  // Intercept threading-related functions
+  MEMPROF_INTERCEPT_FUNC(pthread_create);
+  MEMPROF_INTERCEPT_FUNC(pthread_join);
+
+  InitializePlatformInterceptors();
+
+  VReport(1, "MemProfiler: libc interceptors initialized\n");
+}
+
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h
new file mode 100644
index 000000000000..b6a4fa411254
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -0,0 +1,54 @@
+//===-- memprof_interceptors.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_interceptors.cpp
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERCEPTORS_H
+#define MEMPROF_INTERCEPTORS_H
+
+#include "interception/interception.h"
+#include "memprof_interceptors_memintrinsics.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+
+namespace __memprof {
+
+void InitializeMemprofInterceptors();
+void InitializePlatformInterceptors();
+
+#define ENSURE_MEMPROF_INITED()                                                \
+  do {                                                                         \
+    CHECK(!memprof_init_is_running);                                           \
+    if (UNLIKELY(!memprof_inited)) {                                           \
+      MemprofInitFromRtl();                                                    \
+    }                                                                          \
+  } while (0)
+
+} // namespace __memprof
+
+DECLARE_REAL(int, memcmp, const void *a1, const void *a2, uptr size)
+DECLARE_REAL(char *, strchr, const char *str, int c)
+DECLARE_REAL(SIZE_T, strlen, const char *s)
+DECLARE_REAL(char *, strncpy, char *to, const char *from, uptr size)
+DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen)
+DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
+
+#define MEMPROF_INTERCEPT_FUNC(name)                                           \
+  do {                                                                         \
+    if (!INTERCEPT_FUNCTION(name))                                             \
+      VReport(1, "MemProfiler: failed to intercept '%s'\n'", #name);           \
+  } while (0)
+#define MEMPROF_INTERCEPT_FUNC_VER(name, ver)                                  \
+  do {                                                                         \
+    if (!INTERCEPT_FUNCTION_VER(name, ver))                                    \
+      VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver);  \
+  } while (0)
+
+#endif // MEMPROF_INTERCEPTORS_H

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp
new file mode 100644
index 000000000000..4eb409362b57
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp
@@ -0,0 +1,29 @@
+//===-- memprof_interceptors_memintrinsics.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf versions of memcpy, memmove, and memset.
+//===---------------------------------------------------------------------===//
+
+#include "memprof_interceptors_memintrinsics.h"
+#include "memprof_stack.h"
+
+using namespace __memprof;
+
+void *__memprof_memcpy(void *to, const void *from, uptr size) {
+  MEMPROF_MEMCPY_IMPL(to, from, size);
+}
+
+void *__memprof_memset(void *block, int c, uptr size) {
+  MEMPROF_MEMSET_IMPL(block, c, size);
+}
+
+void *__memprof_memmove(void *to, const void *from, uptr size) {
+  MEMPROF_MEMMOVE_IMPL(to, from, size);
+}

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h
new file mode 100644
index 000000000000..348461d55c41
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h
@@ -0,0 +1,79 @@
+//===-- memprof_interceptors_memintrinsics.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_interceptors_memintrinsics.cpp
+//===---------------------------------------------------------------------===//
+#ifndef MEMPROF_MEMINTRIN_H
+#define MEMPROF_MEMINTRIN_H
+
+#include "interception/interception.h"
+#include "memprof_interface_internal.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+
+DECLARE_REAL(void *, memcpy, void *to, const void *from, uptr size)
+DECLARE_REAL(void *, memset, void *block, int c, uptr size)
+
+namespace __memprof {
+
+// We implement ACCESS_MEMORY_RANGE, MEMPROF_READ_RANGE,
+// and MEMPROF_WRITE_RANGE as macro instead of function so
+// that no extra frames are created, and stack trace contains
+// relevant information only.
+#define ACCESS_MEMORY_RANGE(offset, size)                                      \
+  do {                                                                         \
+    __memprof_record_access_range(offset, size);                               \
+  } while (0)
+
+// memcpy is called during __memprof_init() from the internals of printf(...).
+// We do not treat memcpy with to==from as a bug.
+// See http://llvm.org/bugs/show_bug.cgi?id=11763.
+#define MEMPROF_MEMCPY_IMPL(to, from, size)                                    \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memcpy(to, from, size);                                  \
+    if (memprof_init_is_running) {                                             \
+      return REAL(memcpy)(to, from, size);                                     \
+    }                                                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+    MEMPROF_READ_RANGE(from, size);                                            \
+    MEMPROF_WRITE_RANGE(to, size);                                             \
+    return REAL(memcpy)(to, from, size);                                       \
+  } while (0)
+
+// memset is called inside Printf.
+#define MEMPROF_MEMSET_IMPL(block, c, size)                                    \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memset(block, c, size);                                  \
+    if (memprof_init_is_running) {                                             \
+      return REAL(memset)(block, c, size);                                     \
+    }                                                                          \
+    ENSURE_MEMPROF_INITED();                                                   \
+    MEMPROF_WRITE_RANGE(block, size);                                          \
+    return REAL(memset)(block, c, size);                                       \
+  } while (0)
+
+#define MEMPROF_MEMMOVE_IMPL(to, from, size)                                   \
+  do {                                                                         \
+    if (UNLIKELY(!memprof_inited))                                             \
+      return internal_memmove(to, from, size);                                 \
+    ENSURE_MEMPROF_INITED();                                                   \
+    MEMPROF_READ_RANGE(from, size);                                            \
+    MEMPROF_WRITE_RANGE(to, size);                                             \
+    return internal_memmove(to, from, size);                                   \
+  } while (0)
+
+#define MEMPROF_READ_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size)
+#define MEMPROF_WRITE_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size)
+
+} // namespace __memprof
+
+#endif // MEMPROF_MEMINTRIN_H

diff  --git a/compiler-rt/lib/memprof/memprof_interface_internal.h b/compiler-rt/lib/memprof/memprof_interface_internal.h
new file mode 100644
index 000000000000..ea410f5cab06
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_interface_internal.h
@@ -0,0 +1,60 @@
+//===-- memprof_interface_internal.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// This header declares the MemProfiler runtime interface functions.
+// The runtime library has to define these functions so the instrumented program
+// could call them.
+//
+// See also include/sanitizer/memprof_interface.h
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERFACE_INTERNAL_H
+#define MEMPROF_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+#include "memprof_init_version.h"
+
+using __sanitizer::u32;
+using __sanitizer::u64;
+using __sanitizer::uptr;
+
+extern "C" {
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_init();
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_preinit();
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_version_mismatch_check_v1();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __memprof_record_access(void const volatile *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __memprof_record_access_range(void const volatile *addr, uptr size);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_print_accumulated_stats();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__memprof_default_options();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+extern uptr __memprof_shadow_memory_dynamic_address;
+
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_load(uptr p);
+SANITIZER_INTERFACE_ATTRIBUTE void __memprof_store(uptr p);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memcpy(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memset(void *s, int c, uptr n);
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__memprof_memmove(void *dest, const void *src, uptr n);
+} // extern "C"
+
+#endif // MEMPROF_INTERFACE_INTERNAL_H

diff  --git a/compiler-rt/lib/memprof/memprof_internal.h b/compiler-rt/lib/memprof/memprof_internal.h
new file mode 100644
index 000000000000..8d227887fe15
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_internal.h
@@ -0,0 +1,104 @@
+//===-- memprof_internal.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header which defines various general utilities.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_INTERNAL_H
+#define MEMPROF_INTERNAL_H
+
+#include "memprof_flags.h"
+#include "memprof_interface_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#error "The MemProfiler run-time should not be instrumented by MemProfiler"
+#endif
+
+// Build-time configuration options.
+
+// If set, memprof will intercept C++ exception api call(s).
+#ifndef MEMPROF_HAS_EXCEPTIONS
+#define MEMPROF_HAS_EXCEPTIONS 1
+#endif
+
+#ifndef MEMPROF_DYNAMIC
+#ifdef PIC
+#define MEMPROF_DYNAMIC 1
+#else
+#define MEMPROF_DYNAMIC 0
+#endif
+#endif
+
+// All internal functions in memprof reside inside the __memprof namespace
+// to avoid namespace collisions with the user programs.
+// Separate namespace also makes it simpler to distinguish the memprof
+// run-time functions from the instrumented user code in a profile.
+namespace __memprof {
+
+class MemprofThread;
+using __sanitizer::StackTrace;
+
+void MemprofInitFromRtl();
+
+// memprof_rtl.cpp
+void PrintAddressSpaceLayout();
+
+// memprof_shadow_setup.cpp
+void InitializeShadowMemory();
+
+// memprof_malloc_linux.cpp
+void ReplaceSystemMalloc();
+
+// memprof_linux.cpp
+uptr FindDynamicShadowStart();
+void *MemprofDoesNotSupportStaticLinkage();
+
+// memprof_thread.cpp
+MemprofThread *CreateMainThread();
+
+void ReadContextStack(void *context, uptr *stack, uptr *ssize);
+
+// Wrapper for TLS/TSD.
+void TSDInit(void (*destructor)(void *tsd));
+void *TSDGet();
+void TSDSet(void *tsd);
+void PlatformTSDDtor(void *tsd);
+
+void *MemprofDlSymNext(const char *sym);
+
+// Add convenient macro for interface functions that may be represented as
+// weak hooks.
+#define MEMPROF_MALLOC_HOOK(ptr, size)                                         \
+  do {                                                                         \
+    if (&__sanitizer_malloc_hook)                                              \
+      __sanitizer_malloc_hook(ptr, size);                                      \
+    RunMallocHooks(ptr, size);                                                 \
+  } while (false)
+#define MEMPROF_FREE_HOOK(ptr)                                                 \
+  do {                                                                         \
+    if (&__sanitizer_free_hook)                                                \
+      __sanitizer_free_hook(ptr);                                              \
+    RunFreeHooks(ptr);                                                         \
+  } while (false)
+
+extern int memprof_inited;
+extern int memprof_timestamp_inited;
+extern int memprof_init_done;
+// Used to avoid infinite recursion in __memprof_init().
+extern bool memprof_init_is_running;
+extern void (*death_callback)(void);
+extern long memprof_init_timestamp_s;
+
+} // namespace __memprof
+
+#endif // MEMPROF_INTERNAL_H

diff  --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp
new file mode 100644
index 000000000000..4846e988f58e
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_linux.cpp
@@ -0,0 +1,78 @@
+//===-- memprof_linux.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Linux-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_LINUX
+#error Unsupported OS
+#endif
+
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_freebsd.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <link.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <unwind.h>
+
+typedef enum {
+  MEMPROF_RT_VERSION_UNDEFINED = 0,
+  MEMPROF_RT_VERSION_DYNAMIC,
+  MEMPROF_RT_VERSION_STATIC,
+} memprof_rt_version_t;
+
+// FIXME: perhaps also store abi version here?
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+memprof_rt_version_t __memprof_rt_version;
+}
+
+namespace __memprof {
+
+void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
+
+void *MemprofDoesNotSupportStaticLinkage() {
+  // This will fail to link with -static.
+  return &_DYNAMIC; // defined in link.h
+}
+
+uptr FindDynamicShadowStart() {
+  uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
+  return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
+}
+
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
+  ucontext_t *ucp = (ucontext_t *)context;
+  *stack = (uptr)ucp->uc_stack.ss_sp;
+  *ssize = ucp->uc_stack.ss_size;
+}
+
+void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); }
+
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
new file mode 100644
index 000000000000..c7330f4619a1
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
@@ -0,0 +1,226 @@
+//===-- memprof_malloc_linux.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Linux-specific malloc interception.
+// We simply define functions like malloc, free, realloc, etc.
+// They will replace the corresponding libc functions automagically.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_LINUX
+#error Unsupported OS
+#endif
+
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+
+// ---------------------- Replacement functions ---------------- {{{1
+using namespace __memprof;
+
+static uptr allocated_for_dlsym;
+static uptr last_dlsym_alloc_size_in_words;
+static const uptr kDlsymAllocPoolSize = 1024;
+static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
+
+static inline bool IsInDlsymAllocPool(const void *ptr) {
+  uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]);
+}
+
+static void *AllocateFromLocalPool(uptr size_in_bytes) {
+  uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
+  void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  last_dlsym_alloc_size_in_words = size_in_words;
+  allocated_for_dlsym += size_in_words;
+  CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
+  return mem;
+}
+
+static void DeallocateFromLocalPool(const void *ptr) {
+  // Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
+  // error messages and instead uses malloc followed by free. To avoid pool
+  // exhaustion due to long object filenames, handle that special case here.
+  uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
+  void *prev_mem = (void *)&alloc_memory_for_dlsym[prev_offset];
+  if (prev_mem == ptr) {
+    REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
+    allocated_for_dlsym = prev_offset;
+    last_dlsym_alloc_size_in_words = 0;
+  }
+}
+
+static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
+                                      uptr size_in_bytes) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
+    return errno_EINVAL;
+
+  CHECK(alignment >= kWordSize);
+
+  uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  uptr aligned_addr = RoundUpTo(addr, alignment);
+  uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
+
+  uptr *end_mem = (uptr *)(aligned_addr + aligned_size);
+  uptr allocated = end_mem - alloc_memory_for_dlsym;
+  if (allocated >= kDlsymAllocPoolSize)
+    return errno_ENOMEM;
+
+  allocated_for_dlsym = allocated;
+  *memptr = (void *)aligned_addr;
+  return 0;
+}
+
+static inline bool MaybeInDlsym() { return memprof_init_is_running; }
+
+static inline bool UseLocalPool() { return MaybeInDlsym(); }
+
+static void *ReallocFromLocalPool(void *ptr, uptr size) {
+  const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
+  void *new_ptr;
+  if (UNLIKELY(UseLocalPool())) {
+    new_ptr = AllocateFromLocalPool(size);
+  } else {
+    ENSURE_MEMPROF_INITED();
+    GET_STACK_TRACE_MALLOC;
+    new_ptr = memprof_malloc(size, &stack);
+  }
+  internal_memcpy(new_ptr, ptr, copy_size);
+  return new_ptr;
+}
+
+INTERCEPTOR(void, free, void *ptr) {
+  GET_STACK_TRACE_FREE;
+  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
+    DeallocateFromLocalPool(ptr);
+    return;
+  }
+  memprof_free(ptr, &stack, FROM_MALLOC);
+}
+
+#if SANITIZER_INTERCEPT_CFREE
+INTERCEPTOR(void, cfree, void *ptr) {
+  GET_STACK_TRACE_FREE;
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+    return;
+  memprof_free(ptr, &stack, FROM_MALLOC);
+}
+#endif // SANITIZER_INTERCEPT_CFREE
+
+INTERCEPTOR(void *, malloc, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_malloc(size, &stack);
+}
+
+INTERCEPTOR(void *, calloc, uptr nmemb, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(nmemb * size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_calloc(nmemb, size, &stack);
+}
+
+INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+    return ReallocFromLocalPool(ptr, size);
+  if (UNLIKELY(UseLocalPool()))
+    return AllocateFromLocalPool(size);
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_realloc(ptr, size, &stack);
+}
+
+#if SANITIZER_INTERCEPT_REALLOCARRAY
+INTERCEPTOR(void *, reallocarray, void *ptr, uptr nmemb, uptr size) {
+  ENSURE_MEMPROF_INITED();
+  GET_STACK_TRACE_MALLOC;
+  return memprof_reallocarray(ptr, nmemb, size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_REALLOCARRAY
+
+#if SANITIZER_INTERCEPT_MEMALIGN
+INTERCEPTOR(void *, memalign, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_memalign(boundary, size, &stack, FROM_MALLOC);
+}
+
+INTERCEPTOR(void *, __libc_memalign, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  void *res = memprof_memalign(boundary, size, &stack, FROM_MALLOC);
+  DTLS_on_libc_memalign(res, size);
+  return res;
+}
+#endif // SANITIZER_INTERCEPT_MEMALIGN
+
+#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
+INTERCEPTOR(void *, aligned_alloc, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_aligned_alloc(boundary, size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_ALIGNED_ALLOC
+
+INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
+  GET_CURRENT_PC_BP_SP;
+  (void)sp;
+  return memprof_malloc_usable_size(ptr, pc, bp);
+}
+
+#if SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
+// We avoid including malloc.h for portability reasons.
+// man mallinfo says the fields are "long", but the implementation uses int.
+// It doesn't matter much -- we just need to make sure that the libc's mallinfo
+// is not called.
+struct fake_mallinfo {
+  int x[10];
+};
+
+INTERCEPTOR(struct fake_mallinfo, mallinfo, void) {
+  struct fake_mallinfo res;
+  REAL(memset)(&res, 0, sizeof(res));
+  return res;
+}
+
+INTERCEPTOR(int, mallopt, int cmd, int value) { return 0; }
+#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
+
+INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    return PosixMemalignFromLocalPool(memptr, alignment, size);
+  GET_STACK_TRACE_MALLOC;
+  return memprof_posix_memalign(memptr, alignment, size, &stack);
+}
+
+INTERCEPTOR(void *, valloc, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_valloc(size, &stack);
+}
+
+#if SANITIZER_INTERCEPT_PVALLOC
+INTERCEPTOR(void *, pvalloc, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return memprof_pvalloc(size, &stack);
+}
+#endif // SANITIZER_INTERCEPT_PVALLOC
+
+INTERCEPTOR(void, malloc_stats, void) { __memprof_print_accumulated_stats(); }
+
+namespace __memprof {
+void ReplaceSystemMalloc() {}
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h
new file mode 100644
index 000000000000..f48018b1a8f2
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mapping.h
@@ -0,0 +1,113 @@
+//===-- memprof_mapping.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Defines MemProf memory mapping.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_MAPPING_H
+#define MEMPROF_MAPPING_H
+
+#include "memprof_internal.h"
+
+static const u64 kDefaultShadowScale = 3;
+#define SHADOW_SCALE kDefaultShadowScale
+
+#define SHADOW_OFFSET __memprof_shadow_memory_dynamic_address
+
+#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
+#define MEMPROF_ALIGNMENT 32
+
+namespace __memprof {
+
+extern uptr kHighMemEnd; // Initialized in __memprof_init.
+
+} // namespace __memprof
+
+#define SHADOW_ENTRY_SIZE 8
+
+// Size of memory block mapped to a single shadow location
+#define MEM_GRANULARITY 64ULL
+
+#define SHADOW_MASK ~(MEM_GRANULARITY - 1)
+
+#define MEM_TO_SHADOW(mem)                                                     \
+  (((mem & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
+
+#define kLowMemBeg 0
+#define kLowMemEnd (SHADOW_OFFSET ? SHADOW_OFFSET - 1 : 0)
+
+#define kLowShadowBeg SHADOW_OFFSET
+#define kLowShadowEnd (MEM_TO_SHADOW(kLowMemEnd) + SHADOW_ENTRY_SIZE - 1)
+
+#define kHighMemBeg (MEM_TO_SHADOW(kHighMemEnd) + 1 + SHADOW_ENTRY_SIZE - 1)
+
+#define kHighShadowBeg MEM_TO_SHADOW(kHighMemBeg)
+#define kHighShadowEnd (MEM_TO_SHADOW(kHighMemEnd) + SHADOW_ENTRY_SIZE - 1)
+
+// With the zero shadow base we can not actually map pages starting from 0.
+// This constant is somewhat arbitrary.
+#define kZeroBaseShadowStart 0
+#define kZeroBaseMaxShadowStart (1 << 18)
+
+#define kShadowGapBeg (kLowShadowEnd ? kLowShadowEnd + 1 : kZeroBaseShadowStart)
+#define kShadowGapEnd (kHighShadowBeg - 1)
+
+namespace __memprof {
+
+inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
+inline bool AddrIsInLowMem(uptr a) { return a <= kLowMemEnd; }
+
+inline bool AddrIsInLowShadow(uptr a) {
+  return a >= kLowShadowBeg && a <= kLowShadowEnd;
+}
+
+inline bool AddrIsInHighMem(uptr a) {
+  return kHighMemBeg && a >= kHighMemBeg && a <= kHighMemEnd;
+}
+
+inline bool AddrIsInHighShadow(uptr a) {
+  return kHighMemBeg && a >= kHighShadowBeg && a <= kHighShadowEnd;
+}
+
+inline bool AddrIsInShadowGap(uptr a) {
+  // In zero-based shadow mode we treat addresses near zero as addresses
+  // in shadow gap as well.
+  if (SHADOW_OFFSET == 0)
+    return a <= kShadowGapEnd;
+  return a >= kShadowGapBeg && a <= kShadowGapEnd;
+}
+
+inline bool AddrIsInMem(uptr a) {
+  return AddrIsInLowMem(a) || AddrIsInHighMem(a) ||
+         (flags()->protect_shadow_gap == 0 && AddrIsInShadowGap(a));
+}
+
+inline uptr MemToShadow(uptr p) {
+  CHECK(AddrIsInMem(p));
+  return MEM_TO_SHADOW(p);
+}
+
+inline bool AddrIsInShadow(uptr a) {
+  return AddrIsInLowShadow(a) || AddrIsInHighShadow(a);
+}
+
+inline bool AddrIsAlignedByGranularity(uptr a) {
+  return (a & (SHADOW_GRANULARITY - 1)) == 0;
+}
+
+inline void RecordAccess(uptr a) {
+  // If we use a 
diff erent shadow size then the type below needs adjustment.
+  CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
+  u64 *shadow_address = (u64 *)MEM_TO_SHADOW(a);
+  (*shadow_address)++;
+}
+
+} // namespace __memprof
+
+#endif // MEMPROF_MAPPING_H

diff  --git a/compiler-rt/lib/memprof/memprof_new_delete.cpp b/compiler-rt/lib/memprof/memprof_new_delete.cpp
new file mode 100644
index 000000000000..cae5de301367
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_new_delete.cpp
@@ -0,0 +1,145 @@
+//===-- memprof_interceptors.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Interceptors for operators new and delete.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+
+#include "interception/interception.h"
+
+#include <stddef.h>
+
+#define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
+
+using namespace __memprof;
+
+// Fake std::nothrow_t and std::align_val_t to avoid including <new>.
+namespace std {
+struct nothrow_t {};
+enum class align_val_t : size_t {};
+} // namespace std
+
+#define OPERATOR_NEW_BODY(type, nothrow)                                       \
+  GET_STACK_TRACE_MALLOC;                                                      \
+  void *res = memprof_memalign(0, size, &stack, type);                         \
+  if (!nothrow && UNLIKELY(!res))                                              \
+    ReportOutOfMemory(size, &stack);                                           \
+  return res;
+#define OPERATOR_NEW_BODY_ALIGN(type, nothrow)                                 \
+  GET_STACK_TRACE_MALLOC;                                                      \
+  void *res = memprof_memalign((uptr)align, size, &stack, type);               \
+  if (!nothrow && UNLIKELY(!res))                                              \
+    ReportOutOfMemory(size, &stack);                                           \
+  return res;
+
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::nothrow_t const &) {
+  OPERATOR_NEW_BODY(FROM_NEW, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::nothrow_t const &) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, false /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align,
+                   std::nothrow_t const &) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW, true /*nothrow*/);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align,
+                     std::nothrow_t const &) {
+  OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, true /*nothrow*/);
+}
+
+#define OPERATOR_DELETE_BODY(type)                                             \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, 0, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE(type)                                        \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, size, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_ALIGN(type)                                       \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, 0, static_cast<uptr>(align), &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE_ALIGN(type)                                  \
+  GET_STACK_TRACE_FREE;                                                        \
+  memprof_delete(ptr, size, static_cast<uptr>(align), &stack, type);
+
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr)NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW); }
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr) NOEXCEPT {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size)NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size) NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align)NOEXCEPT {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align,
+                     std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align,
+                       std::nothrow_t const &) {
+  OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size, std::align_val_t align)NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size,
+                       std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW_BR);
+}

diff  --git a/compiler-rt/lib/memprof/memprof_posix.cpp b/compiler-rt/lib/memprof/memprof_posix.cpp
new file mode 100644
index 000000000000..ee0821b85102
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_posix.cpp
@@ -0,0 +1,55 @@
+//===-- memprof_posix.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Posix-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if !SANITIZER_POSIX
+#error Only Posix supported
+#endif
+
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+#include <pthread.h>
+
+namespace __memprof {
+
+// ---------------------- TSD ---------------- {{{1
+
+static pthread_key_t tsd_key;
+static bool tsd_key_inited = false;
+void TSDInit(void (*destructor)(void *tsd)) {
+  CHECK(!tsd_key_inited);
+  tsd_key_inited = true;
+  CHECK_EQ(0, pthread_key_create(&tsd_key, destructor));
+}
+
+void *TSDGet() {
+  CHECK(tsd_key_inited);
+  return pthread_getspecific(tsd_key);
+}
+
+void TSDSet(void *tsd) {
+  CHECK(tsd_key_inited);
+  pthread_setspecific(tsd_key, tsd);
+}
+
+void PlatformTSDDtor(void *tsd) {
+  MemprofThreadContext *context = (MemprofThreadContext *)tsd;
+  if (context->destructor_iterations > 1) {
+    context->destructor_iterations--;
+    CHECK_EQ(0, pthread_setspecific(tsd_key, tsd));
+    return;
+  }
+  MemprofThread::TSDDtor(tsd);
+}
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_preinit.cpp b/compiler-rt/lib/memprof/memprof_preinit.cpp
new file mode 100644
index 000000000000..7092cd4ee556
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_preinit.cpp
@@ -0,0 +1,23 @@
+//===-- memprof_preinit.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Call __memprof_init at the very early stage of process startup.
+//===----------------------------------------------------------------------===//
+#include "memprof_internal.h"
+
+using namespace __memprof;
+
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+// The symbol is called __local_memprof_preinit, because it's not intended to
+// be exported. This code linked into the main executable when -fmemory-profile
+// is in the link flags. It can only use exported interface functions.
+__attribute__((section(".preinit_array"),
+               used)) void (*__local_memprof_preinit)(void) = __memprof_preinit;
+#endif

diff  --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp
new file mode 100644
index 000000000000..61270406e48c
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -0,0 +1,312 @@
+//===-- memprof_rtl.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Main file of the MemProf run-time library.
+//===----------------------------------------------------------------------===//
+
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_interface_internal.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "memprof_stats.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+#include <ctime>
+
+uptr __memprof_shadow_memory_dynamic_address; // Global interface symbol.
+
+namespace __memprof {
+
+static void MemprofDie() {
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) != 0) {
+    // Don't die twice - run a busy loop.
+    while (1) {
+    }
+  }
+  if (common_flags()->print_module_map >= 1)
+    DumpProcessMap();
+  if (flags()->unmap_shadow_on_exit) {
+    if (kHighShadowEnd)
+      UnmapOrDie((void *)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+  }
+}
+
+static void MemprofCheckFailed(const char *file, int line, const char *cond,
+                               u64 v1, u64 v2) {
+  Report("MemProfiler CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file, line,
+         cond, (uptr)v1, (uptr)v2);
+
+  // Print a stack trace the first time we come here. Otherwise, we probably
+  // failed a CHECK during symbolization.
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) == 0) {
+    PRINT_CURRENT_STACK_CHECK();
+  }
+
+  Die();
+}
+
+// -------------------------- Globals --------------------- {{{1
+int memprof_inited;
+int memprof_init_done;
+bool memprof_init_is_running;
+int memprof_timestamp_inited;
+long memprof_init_timestamp_s;
+
+uptr kHighMemEnd;
+
+// -------------------------- Run-time entry ------------------- {{{1
+// exported functions
+
+#define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY() __memprof::RecordAccess(addr);
+
+#define MEMPROF_MEMORY_ACCESS_CALLBACK(type)                                   \
+  extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_##type(uptr addr) {   \
+    MEMPROF_MEMORY_ACCESS_CALLBACK_BODY()                                      \
+  }
+
+MEMPROF_MEMORY_ACCESS_CALLBACK(load)
+MEMPROF_MEMORY_ACCESS_CALLBACK(store)
+
+// Force the linker to keep the symbols for various MemProf interface
+// functions. We want to keep those in the executable in order to let the
+// instrumented dynamic libraries access the symbol even if it is not used by
+// the executable itself. This should help if the build system is removing dead
+// code at link time.
+static NOINLINE void force_interface_symbols() {
+  volatile int fake_condition = 0; // prevent dead condition elimination.
+  // clang-format off
+  switch (fake_condition) {
+    case 1: __memprof_record_access(nullptr); break;
+    case 2: __memprof_record_access_range(nullptr, 0); break;
+  }
+  // clang-format on
+}
+
+static void memprof_atexit() {
+  Printf("MemProfiler exit stats:\n");
+  __memprof_print_accumulated_stats();
+}
+
+static void InitializeHighMemEnd() {
+  kHighMemEnd = GetMaxUserVirtualAddress();
+  // Increase kHighMemEnd to make sure it's properly
+  // aligned together with kHighMemBeg:
+  kHighMemEnd |= (GetMmapGranularity() << SHADOW_SCALE) - 1;
+}
+
+void PrintAddressSpaceLayout() {
+  if (kHighMemBeg) {
+    Printf("|| `[%p, %p]` || HighMem    ||\n", (void *)kHighMemBeg,
+           (void *)kHighMemEnd);
+    Printf("|| `[%p, %p]` || HighShadow ||\n", (void *)kHighShadowBeg,
+           (void *)kHighShadowEnd);
+  }
+  Printf("|| `[%p, %p]` || ShadowGap  ||\n", (void *)kShadowGapBeg,
+         (void *)kShadowGapEnd);
+  if (kLowShadowBeg) {
+    Printf("|| `[%p, %p]` || LowShadow  ||\n", (void *)kLowShadowBeg,
+           (void *)kLowShadowEnd);
+    Printf("|| `[%p, %p]` || LowMem     ||\n", (void *)kLowMemBeg,
+           (void *)kLowMemEnd);
+  }
+  Printf("MemToShadow(shadow): %p %p", (void *)MEM_TO_SHADOW(kLowShadowBeg),
+         (void *)MEM_TO_SHADOW(kLowShadowEnd));
+  if (kHighMemBeg) {
+    Printf(" %p %p", (void *)MEM_TO_SHADOW(kHighShadowBeg),
+           (void *)MEM_TO_SHADOW(kHighShadowEnd));
+  }
+  Printf("\n");
+  Printf("malloc_context_size=%zu\n",
+         (uptr)common_flags()->malloc_context_size);
+
+  Printf("SHADOW_SCALE: %d\n", (int)SHADOW_SCALE);
+  Printf("SHADOW_GRANULARITY: %d\n", (int)SHADOW_GRANULARITY);
+  Printf("SHADOW_OFFSET: 0x%zx\n", (uptr)SHADOW_OFFSET);
+  CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7);
+}
+
+static bool UNUSED __local_memprof_dyninit = [] {
+  MaybeStartBackgroudThread();
+  SetSoftRssLimitExceededCallback(MemprofSoftRssLimitExceededCallback);
+
+  return false;
+}();
+
+static void MemprofInitInternal() {
+  if (LIKELY(memprof_inited))
+    return;
+  SanitizerToolName = "MemProfiler";
+  CHECK(!memprof_init_is_running && "MemProf init calls itself!");
+  memprof_init_is_running = true;
+
+  CacheBinaryName();
+
+  // Initialize flags. This must be done early, because most of the
+  // initialization steps look at flags().
+  InitializeFlags();
+
+  AvoidCVE_2016_2143();
+
+  SetMallocContextSize(common_flags()->malloc_context_size);
+
+  InitializeHighMemEnd();
+
+  // Make sure we are not statically linked.
+  MemprofDoesNotSupportStaticLinkage();
+
+  // Install tool-specific callbacks in sanitizer_common.
+  AddDieCallback(MemprofDie);
+  SetCheckFailedCallback(MemprofCheckFailed);
+
+  __sanitizer_set_report_path(common_flags()->log_path);
+
+  __sanitizer::InitializePlatformEarly();
+
+  // Re-exec ourselves if we need to set additional env or command line args.
+  MaybeReexec();
+
+  // Setup internal allocator callback.
+  SetLowLevelAllocateMinAlignment(SHADOW_GRANULARITY);
+
+  InitializeMemprofInterceptors();
+  CheckASLR();
+
+  ReplaceSystemMalloc();
+
+  DisableCoreDumperIfNecessary();
+
+  InitializeShadowMemory();
+
+  TSDInit(PlatformTSDDtor);
+
+  InitializeAllocator();
+
+  // On Linux MemprofThread::ThreadStart() calls malloc() that's why
+  // memprof_inited should be set to 1 prior to initializing the threads.
+  memprof_inited = 1;
+  memprof_init_is_running = false;
+
+  if (flags()->atexit)
+    Atexit(memprof_atexit);
+
+  InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
+
+  // interceptors
+  InitTlsSize();
+
+  // Create main thread.
+  MemprofThread *main_thread = CreateMainThread();
+  CHECK_EQ(0, main_thread->tid());
+  force_interface_symbols(); // no-op.
+  SanitizerInitializeUnwinder();
+
+  Symbolizer::LateInitialize();
+
+  VReport(1, "MemProfiler Init done\n");
+
+  memprof_init_done = 1;
+}
+
+void MemprofInitTime() {
+  if (LIKELY(memprof_timestamp_inited))
+    return;
+  timespec ts;
+  timespec_get(&ts, TIME_UTC);
+  memprof_init_timestamp_s = ts.tv_sec;
+  memprof_timestamp_inited = 1;
+}
+
+// Initialize as requested from some part of MemProf runtime library
+// (interceptors, allocator, etc).
+void MemprofInitFromRtl() { MemprofInitInternal(); }
+
+#if MEMPROF_DYNAMIC
+// Initialize runtime in case it's LD_PRELOAD-ed into uninstrumented executable
+// (and thus normal initializers from .preinit_array or modules haven't run).
+
+class MemprofInitializer {
+public:
+  MemprofInitializer() { MemprofInitFromRtl(); }
+};
+
+static MemprofInitializer memprof_initializer;
+#endif // MEMPROF_DYNAMIC
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+// Initialize as requested from instrumented application code.
+void __memprof_init() {
+  MemprofInitTime();
+  MemprofInitInternal();
+}
+
+void __memprof_preinit() { MemprofInitInternal(); }
+
+void __memprof_version_mismatch_check_v1() {}
+
+void __memprof_record_access(void const volatile *addr) {
+  __memprof::RecordAccess((uptr)addr);
+}
+
+// We only record the access on the first location in the range,
+// since we will later accumulate the access counts across the
+// full allocation, and we don't want to inflate the hotness from
+// a memory intrinsic on a large range of memory.
+// TODO: Should we do something else so we can better track utilization?
+void __memprof_record_access_range(void const volatile *addr,
+                                   UNUSED uptr size) {
+  __memprof::RecordAccess((uptr)addr);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u16
+__sanitizer_unaligned_load16(const uu16 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u32
+__sanitizer_unaligned_load32(const uu32 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64
+__sanitizer_unaligned_load64(const uu64 *p) {
+  __memprof_record_access(p);
+  return *p;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store16(uu16 *p, u16 x) {
+  __memprof_record_access(p);
+  *p = x;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store32(uu32 *p, u32 x) {
+  __memprof_record_access(p);
+  *p = x;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__sanitizer_unaligned_store64(uu64 *p, u64 x) {
+  __memprof_record_access(p);
+  *p = x;
+}

diff  --git a/compiler-rt/lib/memprof/memprof_shadow_setup.cpp b/compiler-rt/lib/memprof/memprof_shadow_setup.cpp
new file mode 100644
index 000000000000..e7832f656ee8
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_shadow_setup.cpp
@@ -0,0 +1,62 @@
+//===-- memprof_shadow_setup.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Set up the shadow memory.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+
+namespace __memprof {
+
+static void ProtectGap(uptr addr, uptr size) {
+  if (!flags()->protect_shadow_gap) {
+    // The shadow gap is unprotected, so there is a chance that someone
+    // is actually using this memory. Which means it needs a shadow...
+    uptr GapShadowBeg = RoundDownTo(MEM_TO_SHADOW(addr), GetPageSizeCached());
+    uptr GapShadowEnd =
+        RoundUpTo(MEM_TO_SHADOW(addr + size), GetPageSizeCached()) - 1;
+    if (Verbosity())
+      Printf("protect_shadow_gap=0:"
+             " not protecting shadow gap, allocating gap's shadow\n"
+             "|| `[%p, %p]` || ShadowGap's shadow ||\n",
+             GapShadowBeg, GapShadowEnd);
+    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
+                             "unprotected gap shadow");
+    return;
+  }
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
+}
+
+void InitializeShadowMemory() {
+  uptr shadow_start = FindDynamicShadowStart();
+  // Update the shadow memory address (potentially) used by instrumentation.
+  __memprof_shadow_memory_dynamic_address = shadow_start;
+
+  if (kLowShadowBeg)
+    shadow_start -= GetMmapGranularity();
+
+  if (Verbosity())
+    PrintAddressSpaceLayout();
+
+  // mmap the low shadow plus at least one page at the left.
+  if (kLowShadowBeg)
+    ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
+  // mmap the high shadow.
+  ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
+  // protect the gap.
+  ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+  CHECK_EQ(kShadowGapEnd, kHighShadowBeg - 1);
+}
+
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_stack.cpp b/compiler-rt/lib/memprof/memprof_stack.cpp
new file mode 100644
index 000000000000..b5beeeadafd7
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stack.cpp
@@ -0,0 +1,59 @@
+//===-- memprof_stack.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Code for MemProf stack trace.
+//===----------------------------------------------------------------------===//
+#include "memprof_stack.h"
+#include "memprof_internal.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+
+namespace __memprof {
+
+static atomic_uint32_t malloc_context_size;
+
+void SetMallocContextSize(u32 size) {
+  atomic_store(&malloc_context_size, size, memory_order_release);
+}
+
+u32 GetMallocContextSize() {
+  return atomic_load(&malloc_context_size, memory_order_acquire);
+}
+
+} // namespace __memprof
+
+void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
+                                                 void *context,
+                                                 bool request_fast,
+                                                 u32 max_depth) {
+  using namespace __memprof;
+  size = 0;
+  if (UNLIKELY(!memprof_inited))
+    return;
+  request_fast = StackTrace::WillUseFastUnwind(request_fast);
+  MemprofThread *t = GetCurrentThread();
+  if (request_fast) {
+    if (t) {
+      Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(),
+             true);
+    }
+    return;
+  }
+  Unwind(max_depth, pc, bp, context, 0, 0, false);
+}
+
+// ------------------ Interface -------------- {{{1
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_print_stack_trace() {
+  using namespace __memprof;
+  PRINT_CURRENT_STACK();
+}
+} // extern "C"

diff  --git a/compiler-rt/lib/memprof/memprof_stack.h b/compiler-rt/lib/memprof/memprof_stack.h
new file mode 100644
index 000000000000..289a61e385a2
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stack.h
@@ -0,0 +1,75 @@
+//===-- memprof_stack.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_stack.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_STACK_H
+#define MEMPROF_STACK_H
+
+#include "memprof_flags.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+namespace __memprof {
+
+static const u32 kDefaultMallocContextSize = 30;
+
+void SetMallocContextSize(u32 size);
+u32 GetMallocContextSize();
+
+} // namespace __memprof
+
+// NOTE: A Rule of thumb is to retrieve stack trace in the interceptors
+// as early as possible (in functions exposed to the user), as we generally
+// don't want stack trace to contain functions from MemProf internals.
+
+#define GET_STACK_TRACE(max_size, fast)                                        \
+  BufferedStackTrace stack;                                                    \
+  if (max_size <= 2) {                                                         \
+    stack.size = max_size;                                                     \
+    if (max_size > 0) {                                                        \
+      stack.top_frame_bp = GET_CURRENT_FRAME();                                \
+      stack.trace_buffer[0] = StackTrace::GetCurrentPc();                      \
+      if (max_size > 1)                                                        \
+        stack.trace_buffer[1] = GET_CALLER_PC();                               \
+    }                                                                          \
+  } else {                                                                     \
+    stack.Unwind(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), nullptr,     \
+                 fast, max_size);                                              \
+  }
+
+#define GET_STACK_TRACE_FATAL_HERE                                             \
+  GET_STACK_TRACE(kStackTraceMax, common_flags()->fast_unwind_on_fatal)
+
+#define GET_STACK_TRACE_CHECK_HERE                                             \
+  GET_STACK_TRACE(kStackTraceMax, common_flags()->fast_unwind_on_check)
+
+#define GET_STACK_TRACE_THREAD GET_STACK_TRACE(kStackTraceMax, true)
+
+#define GET_STACK_TRACE_MALLOC                                                 \
+  GET_STACK_TRACE(GetMallocContextSize(), common_flags()->fast_unwind_on_malloc)
+
+#define GET_STACK_TRACE_FREE GET_STACK_TRACE_MALLOC
+
+#define PRINT_CURRENT_STACK()                                                  \
+  {                                                                            \
+    GET_STACK_TRACE_FATAL_HERE;                                                \
+    stack.Print();                                                             \
+  }
+
+#define PRINT_CURRENT_STACK_CHECK()                                            \
+  {                                                                            \
+    GET_STACK_TRACE_CHECK_HERE;                                                \
+    stack.Print();                                                             \
+  }
+
+#endif // MEMPROF_STACK_H

diff  --git a/compiler-rt/lib/memprof/memprof_stats.cpp b/compiler-rt/lib/memprof/memprof_stats.cpp
new file mode 100644
index 000000000000..8a50d270dc6a
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stats.cpp
@@ -0,0 +1,157 @@
+//===-- memprof_stats.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Code related to statistics collected by MemProfiler.
+//===----------------------------------------------------------------------===//
+#include "memprof_stats.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __memprof {
+
+MemprofStats::MemprofStats() { Clear(); }
+
+void MemprofStats::Clear() {
+  if (REAL(memset))
+    return (void)REAL(memset)(this, 0, sizeof(MemprofStats));
+  internal_memset(this, 0, sizeof(MemprofStats));
+}
+
+static void PrintMallocStatsArray(const char *prefix,
+                                  uptr (&array)[kNumberOfSizeClasses]) {
+  Printf("%s", prefix);
+  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
+    if (!array[i])
+      continue;
+    Printf("%zu:%zu; ", i, array[i]);
+  }
+  Printf("\n");
+}
+
+void MemprofStats::Print() {
+  Printf("Stats: %zuM malloced (%zuM for overhead) by %zu calls\n",
+         malloced >> 20, malloced_overhead >> 20, mallocs);
+  Printf("Stats: %zuM realloced by %zu calls\n", realloced >> 20, reallocs);
+  Printf("Stats: %zuM freed by %zu calls\n", freed >> 20, frees);
+  Printf("Stats: %zuM really freed by %zu calls\n", really_freed >> 20,
+         real_frees);
+  Printf("Stats: %zuM (%zuM-%zuM) mmaped; %zu maps, %zu unmaps\n",
+         (mmaped - munmaped) >> 20, mmaped >> 20, munmaped >> 20, mmaps,
+         munmaps);
+
+  PrintMallocStatsArray("  mallocs by size class: ", malloced_by_size);
+  Printf("Stats: malloc large: %zu\n", malloc_large);
+}
+
+void MemprofStats::MergeFrom(const MemprofStats *stats) {
+  uptr *dst_ptr = reinterpret_cast<uptr *>(this);
+  const uptr *src_ptr = reinterpret_cast<const uptr *>(stats);
+  uptr num_fields = sizeof(*this) / sizeof(uptr);
+  for (uptr i = 0; i < num_fields; i++)
+    dst_ptr[i] += src_ptr[i];
+}
+
+static BlockingMutex print_lock(LINKER_INITIALIZED);
+
+static MemprofStats unknown_thread_stats(LINKER_INITIALIZED);
+static MemprofStats dead_threads_stats(LINKER_INITIALIZED);
+static BlockingMutex dead_threads_stats_lock(LINKER_INITIALIZED);
+// Required for malloc_zone_statistics() on OS X. This can't be stored in
+// per-thread MemprofStats.
+static uptr max_malloced_memory;
+
+static void MergeThreadStats(ThreadContextBase *tctx_base, void *arg) {
+  MemprofStats *accumulated_stats = reinterpret_cast<MemprofStats *>(arg);
+  MemprofThreadContext *tctx = static_cast<MemprofThreadContext *>(tctx_base);
+  if (MemprofThread *t = tctx->thread)
+    accumulated_stats->MergeFrom(&t->stats());
+}
+
+static void GetAccumulatedStats(MemprofStats *stats) {
+  stats->Clear();
+  {
+    ThreadRegistryLock l(&memprofThreadRegistry());
+    memprofThreadRegistry().RunCallbackForEachThreadLocked(MergeThreadStats,
+                                                           stats);
+  }
+  stats->MergeFrom(&unknown_thread_stats);
+  {
+    BlockingMutexLock lock(&dead_threads_stats_lock);
+    stats->MergeFrom(&dead_threads_stats);
+  }
+  // This is not very accurate: we may miss allocation peaks that happen
+  // between two updates of accumulated_stats_. For more accurate bookkeeping
+  // the maximum should be updated on every malloc(), which is unacceptable.
+  if (max_malloced_memory < stats->malloced) {
+    max_malloced_memory = stats->malloced;
+  }
+}
+
+void FlushToDeadThreadStats(MemprofStats *stats) {
+  BlockingMutexLock lock(&dead_threads_stats_lock);
+  dead_threads_stats.MergeFrom(stats);
+  stats->Clear();
+}
+
+MemprofStats &GetCurrentThreadStats() {
+  MemprofThread *t = GetCurrentThread();
+  return (t) ? t->stats() : unknown_thread_stats;
+}
+
+static void PrintAccumulatedStats() {
+  MemprofStats stats;
+  GetAccumulatedStats(&stats);
+  // Use lock to keep reports from mixing up.
+  BlockingMutexLock lock(&print_lock);
+  stats.Print();
+  StackDepotStats *stack_depot_stats = StackDepotGetStats();
+  Printf("Stats: StackDepot: %zd ids; %zdM allocated\n",
+         stack_depot_stats->n_uniq_ids, stack_depot_stats->allocated >> 20);
+  PrintInternalAllocatorStats();
+}
+
+} // namespace __memprof
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __memprof;
+
+uptr __sanitizer_get_current_allocated_bytes() {
+  MemprofStats stats;
+  GetAccumulatedStats(&stats);
+  uptr malloced = stats.malloced;
+  uptr freed = stats.freed;
+  // Return sane value if malloced < freed due to racy
+  // way we update accumulated stats.
+  return (malloced > freed) ? malloced - freed : 1;
+}
+
+uptr __sanitizer_get_heap_size() {
+  MemprofStats stats;
+  GetAccumulatedStats(&stats);
+  return stats.mmaped - stats.munmaped;
+}
+
+uptr __sanitizer_get_free_bytes() {
+  MemprofStats stats;
+  GetAccumulatedStats(&stats);
+  uptr total_free = stats.mmaped - stats.munmaped + stats.really_freed;
+  uptr total_used = stats.malloced;
+  // Return sane value if total_free < total_used due to racy
+  // way we update accumulated stats.
+  return (total_free > total_used) ? total_free - total_used : 1;
+}
+
+uptr __sanitizer_get_unmapped_bytes() { return 0; }
+
+void __memprof_print_accumulated_stats() { PrintAccumulatedStats(); }

diff  --git a/compiler-rt/lib/memprof/memprof_stats.h b/compiler-rt/lib/memprof/memprof_stats.h
new file mode 100644
index 000000000000..ebdaa1909817
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_stats.h
@@ -0,0 +1,61 @@
+//===-- memprof_stats.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for statistics.
+//===----------------------------------------------------------------------===//
+#ifndef MEMPROF_STATS_H
+#define MEMPROF_STATS_H
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+
+namespace __memprof {
+
+// MemprofStats struct is NOT thread-safe.
+// Each MemprofThread has its own MemprofStats, which are sometimes flushed
+// to the accumulated MemprofStats.
+struct MemprofStats {
+  // MemprofStats must be a struct consisting of uptr fields only.
+  // When merging two MemprofStats structs, we treat them as arrays of uptr.
+  uptr mallocs;
+  uptr malloced;
+  uptr malloced_overhead;
+  uptr frees;
+  uptr freed;
+  uptr real_frees;
+  uptr really_freed;
+  uptr reallocs;
+  uptr realloced;
+  uptr mmaps;
+  uptr mmaped;
+  uptr munmaps;
+  uptr munmaped;
+  uptr malloc_large;
+  uptr malloced_by_size[kNumberOfSizeClasses];
+
+  // Ctor for global MemprofStats (accumulated stats for dead threads).
+  explicit MemprofStats(LinkerInitialized) {}
+  // Creates empty stats.
+  MemprofStats();
+
+  void Print(); // Prints formatted stats to stderr.
+  void Clear();
+  void MergeFrom(const MemprofStats *stats);
+};
+
+// Returns stats for GetCurrentThread(), or stats for fake "unknown thread"
+// if GetCurrentThread() returns 0.
+MemprofStats &GetCurrentThreadStats();
+// Flushes a given stats into accumulated stats of dead threads.
+void FlushToDeadThreadStats(MemprofStats *stats);
+
+} // namespace __memprof
+
+#endif // MEMPROF_STATS_H

diff  --git a/compiler-rt/lib/memprof/memprof_thread.cpp b/compiler-rt/lib/memprof/memprof_thread.cpp
new file mode 100644
index 000000000000..1bfff69bf1be
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_thread.cpp
@@ -0,0 +1,220 @@
+//===-- memprof_thread.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Thread-related code.
+//===----------------------------------------------------------------------===//
+#include "memprof_thread.h"
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+
+namespace __memprof {
+
+// MemprofThreadContext implementation.
+
+void MemprofThreadContext::OnCreated(void *arg) {
+  CreateThreadContextArgs *args = static_cast<CreateThreadContextArgs *>(arg);
+  if (args->stack)
+    stack_id = StackDepotPut(*args->stack);
+  thread = args->thread;
+  thread->set_context(this);
+}
+
+void MemprofThreadContext::OnFinished() {
+  // Drop the link to the MemprofThread object.
+  thread = nullptr;
+}
+
+static ALIGNED(16) char thread_registry_placeholder[sizeof(ThreadRegistry)];
+static ThreadRegistry *memprof_thread_registry;
+
+static BlockingMutex mu_for_thread_context(LINKER_INITIALIZED);
+static LowLevelAllocator allocator_for_thread_context;
+
+static ThreadContextBase *GetMemprofThreadContext(u32 tid) {
+  BlockingMutexLock lock(&mu_for_thread_context);
+  return new (allocator_for_thread_context) MemprofThreadContext(tid);
+}
+
+ThreadRegistry &memprofThreadRegistry() {
+  static bool initialized;
+  // Don't worry about thread_safety - this should be called when there is
+  // a single thread.
+  if (!initialized) {
+    // Never reuse MemProf threads: we store pointer to MemprofThreadContext
+    // in TSD and can't reliably tell when no more TSD destructors will
+    // be called. It would be wrong to reuse MemprofThreadContext for another
+    // thread before all TSD destructors will be called for it.
+    memprof_thread_registry = new (thread_registry_placeholder) ThreadRegistry(
+        GetMemprofThreadContext, kMaxNumberOfThreads, kMaxNumberOfThreads);
+    initialized = true;
+  }
+  return *memprof_thread_registry;
+}
+
+MemprofThreadContext *GetThreadContextByTidLocked(u32 tid) {
+  return static_cast<MemprofThreadContext *>(
+      memprofThreadRegistry().GetThreadLocked(tid));
+}
+
+// MemprofThread implementation.
+
+MemprofThread *MemprofThread::Create(thread_callback_t start_routine, void *arg,
+                                     u32 parent_tid, StackTrace *stack,
+                                     bool detached) {
+  uptr PageSize = GetPageSizeCached();
+  uptr size = RoundUpTo(sizeof(MemprofThread), PageSize);
+  MemprofThread *thread = (MemprofThread *)MmapOrDie(size, __func__);
+  thread->start_routine_ = start_routine;
+  thread->arg_ = arg;
+  MemprofThreadContext::CreateThreadContextArgs args = {thread, stack};
+  memprofThreadRegistry().CreateThread(*reinterpret_cast<uptr *>(thread),
+                                       detached, parent_tid, &args);
+
+  return thread;
+}
+
+void MemprofThread::TSDDtor(void *tsd) {
+  MemprofThreadContext *context = (MemprofThreadContext *)tsd;
+  VReport(1, "T%d TSDDtor\n", context->tid);
+  if (context->thread)
+    context->thread->Destroy();
+}
+
+void MemprofThread::Destroy() {
+  int tid = this->tid();
+  VReport(1, "T%d exited\n", tid);
+
+  malloc_storage().CommitBack();
+  memprofThreadRegistry().FinishThread(tid);
+  FlushToDeadThreadStats(&stats_);
+  uptr size = RoundUpTo(sizeof(MemprofThread), GetPageSizeCached());
+  UnmapOrDie(this, size);
+  DTLS_Destroy();
+}
+
+inline MemprofThread::StackBounds MemprofThread::GetStackBounds() const {
+  if (stack_bottom_ >= stack_top_)
+    return {0, 0};
+  return {stack_bottom_, stack_top_};
+}
+
+uptr MemprofThread::stack_top() { return GetStackBounds().top; }
+
+uptr MemprofThread::stack_bottom() { return GetStackBounds().bottom; }
+
+uptr MemprofThread::stack_size() {
+  const auto bounds = GetStackBounds();
+  return bounds.top - bounds.bottom;
+}
+
+void MemprofThread::Init(const InitOptions *options) {
+  CHECK_EQ(this->stack_size(), 0U);
+  SetThreadStackAndTls(options);
+  if (stack_top_ != stack_bottom_) {
+    CHECK_GT(this->stack_size(), 0U);
+    CHECK(AddrIsInMem(stack_bottom_));
+    CHECK(AddrIsInMem(stack_top_ - 1));
+  }
+  int local = 0;
+  VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
+          (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
+          &local);
+}
+
+thread_return_t
+MemprofThread::ThreadStart(tid_t os_id,
+                           atomic_uintptr_t *signal_thread_is_registered) {
+  Init();
+  memprofThreadRegistry().StartThread(tid(), os_id, ThreadType::Regular,
+                                      nullptr);
+  if (signal_thread_is_registered)
+    atomic_store(signal_thread_is_registered, 1, memory_order_release);
+
+  if (!start_routine_) {
+    // start_routine_ == 0 if we're on the main thread or on one of the
+    // OS X libdispatch worker threads. But nobody is supposed to call
+    // ThreadStart() for the worker threads.
+    CHECK_EQ(tid(), 0);
+    return 0;
+  }
+
+  return start_routine_(arg_);
+}
+
+MemprofThread *CreateMainThread() {
+  MemprofThread *main_thread = MemprofThread::Create(
+      /* start_routine */ nullptr, /* arg */ nullptr, /* parent_tid */ 0,
+      /* stack */ nullptr, /* detached */ true);
+  SetCurrentThread(main_thread);
+  main_thread->ThreadStart(internal_getpid(),
+                           /* signal_thread_is_registered */ nullptr);
+  return main_thread;
+}
+
+// This implementation doesn't use the argument, which is just passed down
+// from the caller of Init (which see, above).  It's only there to support
+// OS-specific implementations that need more information passed through.
+void MemprofThread::SetThreadStackAndTls(const InitOptions *options) {
+  DCHECK_EQ(options, nullptr);
+  uptr tls_size = 0;
+  uptr stack_size = 0;
+  GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size, &tls_begin_,
+                       &tls_size);
+  stack_top_ = stack_bottom_ + stack_size;
+  tls_end_ = tls_begin_ + tls_size;
+  dtls_ = DTLS_Get();
+
+  if (stack_top_ != stack_bottom_) {
+    int local;
+    CHECK(AddrIsInStack((uptr)&local));
+  }
+}
+
+bool MemprofThread::AddrIsInStack(uptr addr) {
+  const auto bounds = GetStackBounds();
+  return addr >= bounds.bottom && addr < bounds.top;
+}
+
+MemprofThread *GetCurrentThread() {
+  MemprofThreadContext *context =
+      reinterpret_cast<MemprofThreadContext *>(TSDGet());
+  if (!context)
+    return nullptr;
+  return context->thread;
+}
+
+void SetCurrentThread(MemprofThread *t) {
+  CHECK(t->context());
+  VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(),
+          (void *)GetThreadSelf());
+  // Make sure we do not reset the current MemprofThread.
+  CHECK_EQ(0, TSDGet());
+  TSDSet(t->context());
+  CHECK_EQ(t->context(), TSDGet());
+}
+
+u32 GetCurrentTidOrInvalid() {
+  MemprofThread *t = GetCurrentThread();
+  return t ? t->tid() : kInvalidTid;
+}
+
+void EnsureMainThreadIDIsCorrect() {
+  MemprofThreadContext *context =
+      reinterpret_cast<MemprofThreadContext *>(TSDGet());
+  if (context && (context->tid == 0))
+    context->os_id = GetTid();
+}
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_thread.h b/compiler-rt/lib/memprof/memprof_thread.h
new file mode 100644
index 000000000000..4049805a1bec
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_thread.h
@@ -0,0 +1,138 @@
+//===-- memprof_thread.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// MemProf-private header for memprof_thread.cpp.
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMPROF_THREAD_H
+#define MEMPROF_THREAD_H
+
+#include "memprof_allocator.h"
+#include "memprof_internal.h"
+#include "memprof_stats.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_thread_registry.h"
+
+namespace __sanitizer {
+struct DTLS;
+} // namespace __sanitizer
+
+namespace __memprof {
+
+const u32 kInvalidTid = 0xffffff;          // Must fit into 24 bits.
+const u32 kMaxNumberOfThreads = (1 << 22); // 4M
+
+class MemprofThread;
+
+// These objects are created for every thread and are never deleted,
+// so we can find them by tid even if the thread is long dead.
+struct MemprofThreadContext : public ThreadContextBase {
+  explicit MemprofThreadContext(int tid)
+      : ThreadContextBase(tid), announced(false),
+        destructor_iterations(GetPthreadDestructorIterations()), stack_id(0),
+        thread(nullptr) {}
+  bool announced;
+  u8 destructor_iterations;
+  u32 stack_id;
+  MemprofThread *thread;
+
+  void OnCreated(void *arg) override;
+  void OnFinished() override;
+
+  struct CreateThreadContextArgs {
+    MemprofThread *thread;
+    StackTrace *stack;
+  };
+};
+
+// MemprofThreadContext objects are never freed, so we need many of them.
+COMPILER_CHECK(sizeof(MemprofThreadContext) <= 256);
+
+// MemprofThread are stored in TSD and destroyed when the thread dies.
+class MemprofThread {
+public:
+  static MemprofThread *Create(thread_callback_t start_routine, void *arg,
+                               u32 parent_tid, StackTrace *stack,
+                               bool detached);
+  static void TSDDtor(void *tsd);
+  void Destroy();
+
+  struct InitOptions;
+  void Init(const InitOptions *options = nullptr);
+
+  thread_return_t ThreadStart(tid_t os_id,
+                              atomic_uintptr_t *signal_thread_is_registered);
+
+  uptr stack_top();
+  uptr stack_bottom();
+  uptr stack_size();
+  uptr tls_begin() { return tls_begin_; }
+  uptr tls_end() { return tls_end_; }
+  DTLS *dtls() { return dtls_; }
+  u32 tid() { return context_->tid; }
+  MemprofThreadContext *context() { return context_; }
+  void set_context(MemprofThreadContext *context) { context_ = context; }
+
+  bool AddrIsInStack(uptr addr);
+
+  // True is this thread is currently unwinding stack (i.e. collecting a stack
+  // trace). Used to prevent deadlocks on platforms where libc unwinder calls
+  // malloc internally. See PR17116 for more details.
+  bool isUnwinding() const { return unwinding_; }
+  void setUnwinding(bool b) { unwinding_ = b; }
+
+  MemprofThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
+  MemprofStats &stats() { return stats_; }
+
+private:
+  // NOTE: There is no MemprofThread constructor. It is allocated
+  // via mmap() and *must* be valid in zero-initialized state.
+
+  void SetThreadStackAndTls(const InitOptions *options);
+
+  struct StackBounds {
+    uptr bottom;
+    uptr top;
+  };
+  StackBounds GetStackBounds() const;
+
+  MemprofThreadContext *context_;
+  thread_callback_t start_routine_;
+  void *arg_;
+
+  uptr stack_top_;
+  uptr stack_bottom_;
+
+  uptr tls_begin_;
+  uptr tls_end_;
+  DTLS *dtls_;
+
+  MemprofThreadLocalMallocStorage malloc_storage_;
+  MemprofStats stats_;
+  bool unwinding_;
+};
+
+// Returns a single instance of registry.
+ThreadRegistry &memprofThreadRegistry();
+
+// Must be called under ThreadRegistryLock.
+MemprofThreadContext *GetThreadContextByTidLocked(u32 tid);
+
+// Get the current thread. May return 0.
+MemprofThread *GetCurrentThread();
+void SetCurrentThread(MemprofThread *t);
+u32 GetCurrentTidOrInvalid();
+
+// Used to handle fork().
+void EnsureMainThreadIDIsCorrect();
+} // namespace __memprof
+
+#endif // MEMPROF_THREAD_H

diff  --git a/compiler-rt/lib/memprof/weak_symbols.txt b/compiler-rt/lib/memprof/weak_symbols.txt
new file mode 100644
index 000000000000..bb2dea85574f
--- /dev/null
+++ b/compiler-rt/lib/memprof/weak_symbols.txt
@@ -0,0 +1 @@
+___memprof_default_options

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.cpp
index d74e08010d5d..1c6520819ef9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.cpp
@@ -134,4 +134,12 @@ void NORETURN ReportOutOfMemory(uptr requested_size, const StackTrace *stack) {
   Die();
 }
 
+void NORETURN ReportRssLimitExceeded(const StackTrace *stack) {
+  {
+    ScopedAllocatorErrorReport report("rss-limit-exceeded", stack);
+    Report("ERROR: %s: allocator exceeded the RSS limit\n", SanitizerToolName);
+  }
+  Die();
+}
+
 }  // namespace __sanitizer

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h
index 0653c365c1cd..6e4e6b135491 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h
@@ -33,6 +33,7 @@ void NORETURN ReportInvalidPosixMemalignAlignment(uptr alignment,
 void NORETURN ReportAllocationSizeTooBig(uptr user_size, uptr max_size,
                                          const StackTrace *stack);
 void NORETURN ReportOutOfMemory(uptr requested_size, const StackTrace *stack);
+void NORETURN ReportRssLimitExceeded(const StackTrace *stack);
 
 }  // namespace __sanitizer
 

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index d1412478fd2c..d8e809b06094 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -82,8 +82,9 @@ COMMON_FLAG(bool, print_summary, true,
             "If false, disable printing error summaries in addition to error "
             "reports.")
 COMMON_FLAG(int, print_module_map, 0,
-            "OS X only (0 - don't print, 1 - print only once before process "
-            "exits, 2 - print after each report).")
+            "Print the process module map where supported (0 - don't print, "
+            "1 - print only once before process exits, 2 - print after each "
+            "report).")
 COMMON_FLAG(bool, check_printf, true, "Check printf arguments.")
 #define COMMON_FLAG_HANDLE_SIGNAL_HELP(signal) \
     "Controls custom tool's " #signal " handler (0 - do not registers the " \

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index a6c551487052..8b34e54137df 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -448,5 +448,8 @@ using namespace __sanitizer;
 namespace __hwasan {
 using namespace __sanitizer;
 }
+namespace __memprof {
+using namespace __sanitizer;
+}
 
 #endif  // SANITIZER_DEFS_H

diff  --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt
index f0330bcfe304..0836005e10db 100644
--- a/compiler-rt/test/CMakeLists.txt
+++ b/compiler-rt/test/CMakeLists.txt
@@ -68,6 +68,9 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS)
   if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
     compiler_rt_test_runtime(profile)
   endif()
+  if(COMPILER_RT_BUILD_MEMPROF)
+    compiler_rt_test_runtime(memprof)
+  endif()
   if(COMPILER_RT_BUILD_XRAY)
     compiler_rt_test_runtime(xray)
   endif()

diff  --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 57b26c19e8e2..d955f8829e5d 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -57,6 +57,8 @@
 # If needed, add cflag for shadow scale.
 if config.asan_shadow_scale != '':
   config.target_cflags += " -mllvm -asan-mapping-scale=" + config.asan_shadow_scale
+if config.memprof_shadow_scale != '':
+  config.target_cflags += " -mllvm -memprof-mapping-scale=" + config.memprof_shadow_scale
 
 # BFD linker in 64-bit android toolchains fails to find libc++_shared.so, which
 # is a transitive shared library dependency (via asan runtime).
@@ -554,6 +556,11 @@ def is_windows_lto_supported():
 else:
   config.available_features.add("shadow-scale-3")
 
+if config.memprof_shadow_scale:
+  config.available_features.add("memprof-shadow-scale-%s" % config.memprof_shadow_scale)
+else:
+  config.available_features.add("memprof-shadow-scale-3")
+
 if config.expensive_checks:
   config.available_features.add("expensive_checks")
 

diff  --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index 000bf9b98470..92eb8c58ca6b 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -29,6 +29,7 @@ set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPA
 set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@")
 set_default("emulator", "@COMPILER_RT_EMULATOR@")
 set_default("asan_shadow_scale", "@COMPILER_RT_ASAN_SHADOW_SCALE@")
+set_default("memprof_shadow_scale", "@COMPILER_RT_MEMPROF_SHADOW_SCALE@")
 set_default("apple_platform", "osx")
 set_default("apple_platform_min_deployment_target_flag", "-mmacosx-version-min")
 set_default("sanitizer_can_use_cxxabi", @SANITIZER_CAN_USE_CXXABI_PYBOOL@)

diff  --git a/compiler-rt/test/memprof/CMakeLists.txt b/compiler-rt/test/memprof/CMakeLists.txt
new file mode 100644
index 000000000000..8a29919b1770
--- /dev/null
+++ b/compiler-rt/test/memprof/CMakeLists.txt
@@ -0,0 +1,60 @@
+set(MEMPROF_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(MEMPROF_TESTSUITES)
+set(MEMPROF_DYNAMIC_TESTSUITES)
+
+macro(get_bits_for_arch arch bits)
+  if (${arch} MATCHES "x86_64")
+    set(${bits} 64)
+  else()
+    message(FATAL_ERROR "Unexpected target architecture: ${arch}")
+  endif()
+endmacro()
+
+set(MEMPROF_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+if(NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND MEMPROF_TEST_DEPS memprof)
+  if(COMPILER_RT_HAS_LLD AND TARGET lld)
+    list(APPEND MEMPROF_TEST_DEPS lld)
+  endif()
+endif()
+set(MEMPROF_DYNAMIC_TEST_DEPS ${MEMPROF_TEST_DEPS})
+
+set(MEMPROF_TEST_ARCH ${MEMPROF_SUPPORTED_ARCH})
+
+foreach(arch ${MEMPROF_TEST_ARCH})
+  set(MEMPROF_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}-${OS_NAME}" MEMPROF_TEST_CONFIG_SUFFIX)
+  get_bits_for_arch(${arch} MEMPROF_TEST_BITS)
+  get_test_cc_for_arch(${arch} MEMPROF_TEST_TARGET_CC MEMPROF_TEST_TARGET_CFLAGS)
+  set(MEMPROF_TEST_DYNAMIC False)
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py
+    )
+  list(APPEND MEMPROF_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+
+  string(TOLOWER "-${arch}-${OS_NAME}-dynamic" MEMPROF_TEST_CONFIG_SUFFIX)
+  set(MEMPROF_TEST_DYNAMIC True)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py)
+  list(APPEND MEMPROF_DYNAMIC_TESTSUITES
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-memprof "Running the MemProfiler tests"
+  ${MEMPROF_TESTSUITES}
+  DEPENDS ${MEMPROF_TEST_DEPS})
+set_target_properties(check-memprof PROPERTIES FOLDER "Compiler-RT Misc")
+
+add_lit_testsuite(check-memprof-dynamic
+	"Running the MemProfiler tests with dynamic runtime"
+  ${MEMPROF_DYNAMIC_TESTSUITES}
+  ${exclude_from_check_all.g}
+  DEPENDS ${MEMPROF_DYNAMIC_TEST_DEPS})
+set_target_properties(check-memprof-dynamic
+  PROPERTIES FOLDER "Compiler-RT Misc")

diff  --git a/compiler-rt/test/memprof/TestCases/atexit_stats.cpp b/compiler-rt/test/memprof/TestCases/atexit_stats.cpp
new file mode 100644
index 000000000000..0f21ae34189e
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/atexit_stats.cpp
@@ -0,0 +1,20 @@
+// Check atexit option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=atexit=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=atexit=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOATEXIT
+
+// CHECK: MemProfiler exit stats:
+// CHECK: Stats: {{[0-9]+}}M malloced ({{[0-9]+}}M for overhead) by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M realloced by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M freed by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M really freed by {{[0-9]+}} calls
+// CHECK: Stats: {{[0-9]+}}M ({{[0-9]+}}M-{{[0-9]+}}M) mmaped; {{[0-9]+}} maps, {{[0-9]+}} unmaps
+// CHECK:   mallocs by size class:
+// CHECK: Stats: malloc large: {{[0-9]+}}
+
+// NOATEXIT-NOT: MemProfiler exit stats
+
+int main() {
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/default_options.cpp b/compiler-rt/test/memprof/TestCases/default_options.cpp
new file mode 100644
index 000000000000..1b6b61fc048b
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/default_options.cpp
@@ -0,0 +1,12 @@
+// RUN: %clangxx_memprof -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+const char *kMemProfDefaultOptions = "verbosity=1 help=1";
+
+extern "C" const char *__memprof_default_options() {
+  // CHECK: Available flags for MemProfiler:
+  return kMemProfDefaultOptions;
+}
+
+int main() {
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/dump_process_map.cpp b/compiler-rt/test/memprof/TestCases/dump_process_map.cpp
new file mode 100644
index 000000000000..2b9e98a5af3e
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/dump_process_map.cpp
@@ -0,0 +1,14 @@
+// Check print_module_map option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_module_map=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=print_module_map=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOMAP
+
+// CHECK: Process memory map follows:
+// CHECK: dump_process_map.cpp.tmp
+// CHECK: End of process memory map.
+// NOMAP-NOT: memory map
+
+int main() {
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp b/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp
new file mode 100644
index 000000000000..6ed348306a0d
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/free_hook_realloc.cpp
@@ -0,0 +1,33 @@
+// Check that free hook doesn't conflict with Realloc.
+// RUN: %clangxx_memprof -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <sanitizer/allocator_interface.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void *glob_ptr;
+
+extern "C" {
+void __sanitizer_free_hook(const volatile void *ptr) {
+  if (ptr == glob_ptr) {
+    *(int *)ptr = 0;
+    write(1, "FreeHook\n", sizeof("FreeHook\n"));
+  }
+}
+}
+
+int main() {
+  int *x = (int *)malloc(100);
+  x[0] = 42;
+  glob_ptr = x;
+  int *y = (int *)realloc(x, 200);
+  // Verify that free hook was called and didn't spoil the memory.
+  if (y[0] != 42) {
+    _exit(1);
+  }
+  write(1, "Passed\n", sizeof("Passed\n"));
+  free(y);
+  // CHECK: FreeHook
+  // CHECK: Passed
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/interface_test.cpp b/compiler-rt/test/memprof/TestCases/interface_test.cpp
new file mode 100644
index 000000000000..451ab57b8f64
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/interface_test.cpp
@@ -0,0 +1,25 @@
+// Check that user may include MemProf interface header.
+// Also check that interfaces declared in the sanitizer's allocator_interface
+// are defined for MemProf.
+// RUN: %clang_memprof %s -o %t -DMEMPROF && %run %t
+// RUN: %clang_memprof -x c %s -o %t -DMEMPROF && %run %t
+// RUN: %clang %s -pie -o %t && %run %t
+// RUN: %clang -x c %s -pie -o %t && %run %t
+#include <sanitizer/allocator_interface.h>
+#include <sanitizer/memprof_interface.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = (int *)malloc(10 * sizeof(int));
+#ifdef MEMPROF
+  __sanitizer_get_estimated_allocated_size(8);
+  __sanitizer_get_ownership(p);
+  __sanitizer_get_allocated_size(p);
+  __sanitizer_get_current_allocated_bytes();
+  __sanitizer_get_heap_size();
+  __sanitizer_get_free_bytes();
+  __sanitizer_get_unmapped_bytes();
+  // malloc and free hooks are tested by the malloc_hook.cpp test.
+#endif
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/log_path_test.cpp b/compiler-rt/test/memprof/TestCases/log_path_test.cpp
new file mode 100644
index 000000000000..0b1d4982f11c
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/log_path_test.cpp
@@ -0,0 +1,34 @@
+// The for loop in the backticks below requires bash.
+// REQUIRES: shell
+//
+// RUN: %clangxx_memprof  %s -o %t
+
+// Regular run.
+// RUN: %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-GOOD < %t.out
+
+// Good log_path.
+// RUN: rm -f %t.log.*
+// RUN: %env_memprof_opts=log_path=%t.log %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-GOOD < %t.log.*
+
+// Invalid log_path.
+// RUN: %env_memprof_opts=log_path=/dev/null/INVALID not %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-INVALID < %t.out
+
+// Too long log_path.
+// RUN: %env_memprof_opts=log_path=`for((i=0;i<10000;i++)); do echo -n $i; done` \
+// RUN:   not %run %t 2> %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-LONG < %t.out
+
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  char *x = (char *)malloc(10);
+  memset(x, 0, 10);
+  free(x);
+  return 0;
+}
+// CHECK-GOOD: Memory allocation stack id
+// CHECK-INVALID: ERROR: Can't open file: /dev/null/INVALID
+// CHECK-LONG: ERROR: Path is too long: 01234

diff  --git a/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp b/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp
new file mode 100644
index 000000000000..3831d420c376
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp
@@ -0,0 +1,23 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=allocator_may_return_null=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SUMMARY
+// RUN: %env_memprof_opts=allocator_may_return_null=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NULL
+// Test print_summary
+// RUN: %env_memprof_opts=allocator_may_return_null=0:print_summary=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOSUMMARY
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static const size_t kMaxAllowedMallocSizePlusOne = (1ULL << 40) + 1;
+int main() {
+  void *p = malloc(kMaxAllowedMallocSizePlusOne);
+  // CHECK: {{ERROR: MemProfiler: requested allocation size .* exceeds maximum supported size}}
+  // CHECK: {{#0 0x.* in .*malloc}}
+  // CHECK: {{#1 0x.* in main .*malloc-size-too-big.cpp:}}[[@LINE-3]]
+  // CHECK-SUMMARY: SUMMARY: MemProfiler: allocation-size-too-big
+  // CHECK-NOSUMMARY-NOT: SUMMARY:
+
+  printf("malloc returned: %zu\n", (size_t)p);
+  // CHECK-NULL: malloc returned: 0
+
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/malloc_hook.cpp b/compiler-rt/test/memprof/TestCases/malloc_hook.cpp
new file mode 100644
index 000000000000..1c24873ec32d
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/malloc_hook.cpp
@@ -0,0 +1,58 @@
+// Check that MemProf correctly handles malloc and free hooks.
+// RUN: %clangxx_memprof -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <sanitizer/allocator_interface.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+extern "C" {
+const volatile void *global_ptr;
+
+#define WRITE(s) write(1, s, sizeof(s))
+
+// Note: avoid calling functions that allocate memory in malloc/free
+// to avoid infinite recursion.
+void __sanitizer_malloc_hook(const volatile void *ptr, size_t sz) {
+  if (__sanitizer_get_ownership(ptr) && sz == 4) {
+    WRITE("MallocHook\n");
+    global_ptr = ptr;
+  }
+}
+void __sanitizer_free_hook(const volatile void *ptr) {
+  if (__sanitizer_get_ownership(ptr) && ptr == global_ptr)
+    WRITE("FreeHook\n");
+}
+} // extern "C"
+
+volatile int *x;
+
+void MallocHook1(const volatile void *ptr, size_t sz) { WRITE("MH1\n"); }
+void MallocHook2(const volatile void *ptr, size_t sz) { WRITE("MH2\n"); }
+void FreeHook1(const volatile void *ptr) { WRITE("FH1\n"); }
+void FreeHook2(const volatile void *ptr) { WRITE("FH2\n"); }
+// Call this function with uninitialized arguments to poison
+// TLS shadow for function parameters before calling operator
+// new and, eventually, user-provided hook.
+__attribute__((noinline)) void allocate(int *unused1, int *unused2) {
+  x = new int;
+}
+
+int main() {
+  __sanitizer_install_malloc_and_free_hooks(MallocHook1, FreeHook1);
+  __sanitizer_install_malloc_and_free_hooks(MallocHook2, FreeHook2);
+  int *undef1, *undef2;
+  allocate(undef1, undef2);
+  // CHECK: MallocHook
+  // CHECK: MH1
+  // CHECK: MH2
+  // Check that malloc hook was called with correct argument.
+  if (global_ptr != (void *)x) {
+    _exit(1);
+  }
+  *x = 0;
+  delete x;
+  // CHECK: FreeHook
+  // CHECK: FH1
+  // CHECK: FH2
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
new file mode 100644
index 000000000000..54c416b2c3dc
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
@@ -0,0 +1,10 @@
+// Check mem_info_cache_entries option.
+
+// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s
+
+// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00%
+// CHECK-NOT: Set
+
+int main() {
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp b/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp
new file mode 100644
index 000000000000..5c71c1edd4e3
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/memprof_options-help.cpp
@@ -0,0 +1,7 @@
+// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=help=1 %run %t 2>&1 | FileCheck %s
+
+int main() {
+}
+
+// CHECK: Available flags for MemProfiler:
+// CHECK-DAG: atexit

diff  --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
new file mode 100644
index 000000000000..c319a48f0c56
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
@@ -0,0 +1,14 @@
+// Check print_mem_info_cache_miss_rate and
+// print_mem_info_cache_miss_rate_details options.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS
+
+// CHECK: Overall miss rate: 0 / {{.*}} = 0.00%
+// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00%
+// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00%
+
+int main() {
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/realloc.cpp b/compiler-rt/test/memprof/TestCases/realloc.cpp
new file mode 100644
index 000000000000..fa55646a0df9
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/realloc.cpp
@@ -0,0 +1,21 @@
+// RUN: %clangxx_memprof -O0 %s -o %t
+// Default is true (free on realloc to 0 size)
+// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=allocator_frees_and_returns_null_on_realloc_zero=true %run %t 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=allocator_frees_and_returns_null_on_realloc_zero=false %run %t 2>&1 | FileCheck %s --check-prefix=NO-FREE
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  void *p = malloc(42);
+  p = realloc(p, 0);
+  if (p) {
+    // NO-FREE: Allocated something on realloc(p, 0)
+    fprintf(stderr, "Allocated something on realloc(p, 0)\n");
+  } else {
+    // CHECK: realloc(p, 0) returned nullptr
+    fprintf(stderr, "realloc(p, 0) returned nullptr\n");
+  }
+  free(p);
+}

diff  --git a/compiler-rt/test/memprof/TestCases/stress_dtls.c b/compiler-rt/test/memprof/TestCases/stress_dtls.c
new file mode 100644
index 000000000000..c248ebebd99c
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/stress_dtls.c
@@ -0,0 +1,112 @@
+// REQUIRES: memprof-64-bits
+// Stress test dynamic TLS + dlopen + threads.
+//
+// RUN: %clangxx_memprof -x c -DSO_NAME=f0 %s -shared -o %t-f0.so -fPIC
+// RUN: %clangxx_memprof -x c -DSO_NAME=f1 %s -shared -o %t-f1.so -fPIC
+// RUN: %clangxx_memprof -x c -DSO_NAME=f2 %s -shared -o %t-f2.so -fPIC
+// RUN: %clangxx_memprof %s -ldl -pthread -o %t
+// RUN: %run %t 0 3
+// RUN: %run %t 2 3
+// RUN: %env_memprof_opts=verbosity=2 %run %t 10 2 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=verbosity=2:intercept_tls_get_addr=1 %run %t 10 2 2>&1 | FileCheck %s
+// RUN: %env_memprof_opts=verbosity=2:intercept_tls_get_addr=0 %run %t 10 2 2>&1 | FileCheck %s --check-prefix=CHECK0
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 0
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 1
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 2
+// CHECK: __tls_get_addr
+// CHECK: Creating thread 3
+// CHECK: __tls_get_addr
+// Make sure that TLS slots don't leak
+// CHECK-NOT: num_live_dtls 5
+//
+// CHECK0-NOT: __tls_get_addr
+/*
+cc=your-compiler
+
+$cc stress_dtls.c -pthread -ldl
+for((i=0;i<100;i++)); do
+  $cc -fPIC -shared -DSO_NAME=f$i -o a.out-f$i.so stress_dtls.c;
+done
+./a.out 2 4  # <<<<<< 2 threads, 4 libs
+./a.out 3 50 # <<<<<< 3 threads, 50 libs
+*/
+#ifndef SO_NAME
+#define _GNU_SOURCE
+#include <assert.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef void **(*f_t)();
+
+__thread int my_tls;
+
+#define MAX_N_FUNCTIONS 1000
+f_t Functions[MAX_N_FUNCTIONS];
+
+void *PrintStuff(void *unused) {
+  uintptr_t stack;
+  // fprintf(stderr, "STACK: %p TLS: %p SELF: %p\n", &stack, &my_tls,
+  //        (void *)pthread_self());
+  int i;
+  for (i = 0; i < MAX_N_FUNCTIONS; i++) {
+    if (!Functions[i])
+      break;
+    uintptr_t dtls = (uintptr_t)Functions[i]();
+    fprintf(stderr, "  dtls[%03d]: %lx\n", i, dtls);
+    *(long *)dtls = 42; // check that this is writable.
+  }
+  return NULL;
+}
+
+int main(int argc, char *argv[]) {
+  int num_threads = 1;
+  int num_libs = 1;
+  if (argc >= 2)
+    num_threads = atoi(argv[1]);
+  if (argc >= 3)
+    num_libs = atoi(argv[2]);
+  assert(num_libs <= MAX_N_FUNCTIONS);
+
+  int lib;
+  for (lib = 0; lib < num_libs; lib++) {
+    char buf[4096];
+    snprintf(buf, sizeof(buf), "%s-f%d.so", argv[0], lib);
+    void *handle = dlopen(buf, RTLD_LAZY);
+    if (!handle) {
+      fprintf(stderr, "%s\n", dlerror());
+      exit(1);
+    }
+    snprintf(buf, sizeof(buf), "f%d", lib);
+    Functions[lib] = (f_t)dlsym(handle, buf);
+    if (!Functions[lib]) {
+      fprintf(stderr, "%s\n", dlerror());
+      exit(1);
+    }
+    fprintf(stderr, "LIB[%03d] %s: %p\n", lib, buf, Functions[lib]);
+    PrintStuff(0);
+
+    int i;
+    for (i = 0; i < num_threads; i++) {
+      pthread_t t;
+      fprintf(stderr, "Creating thread %d\n", i);
+      pthread_create(&t, 0, PrintStuff, 0);
+      pthread_join(t, 0);
+    }
+  }
+  return 0;
+}
+#else // SO_NAME
+#ifndef DTLS_SIZE
+#define DTLS_SIZE (1 << 17)
+#endif
+__thread void *huge_thread_local_array[DTLS_SIZE];
+void **SO_NAME() {
+  return &huge_thread_local_array[0];
+}
+#endif

diff  --git a/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c b/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c
new file mode 100644
index 000000000000..61c2b420e41a
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_malloc_load_store.c
@@ -0,0 +1,38 @@
+// Check profile with a single malloc call and set of loads and stores. Ensures
+// we get the same profile regardless of whether the memory is deallocated
+// before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:  Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK-NEXT:  lifetime (ave/min/max): [[AVELIFETIME:[0-9]+]].00 / [[AVELIFETIME]] / [[AVELIFETIME]]
+// CHECK-NEXT:  num migrated: 0, num lifetime overlaps: 0, num same alloc cpu: 0, num same dealloc_cpu: 0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in malloc
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = (int *)malloc(10 * sizeof(int));
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  free(p);
+#endif
+
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/test_memintrin.cpp b/compiler-rt/test/memprof/TestCases/test_memintrin.cpp
new file mode 100644
index 000000000000..199aa4a70bce
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_memintrin.cpp
@@ -0,0 +1,48 @@
+// Check profile with calls to memory intrinsics.
+
+// RUN: %clangxx_memprof -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKIDP
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+//   access_count (ave/min/max): 3.00 / 3 / 3
+// but we need to look for them in the same CHECK to get the correct STACKIDP.
+// CHECK-DAG:  Memory allocation stack id = [[STACKIDP:[0-9]+]]{{[[:space:]].*}} alloc_count 1, size (ave/min/max) 40.00 / 40 / 40{{[[:space:]].*}} access_count (ave/min/max): 3.00 / 3 / 3
+//
+// This is actually:
+//  Memory allocation stack id = STACKIDQ
+//   alloc_count 1, size (ave/min/max) 20.00 / 20 / 20
+//   access_count (ave/min/max): 2.00 / 2 / 2
+// but we need to look for them in the same CHECK to get the correct STACKIDQ.
+// CHECK-DAG:  Memory allocation stack id = [[STACKIDQ:[0-9]+]]{{[[:space:]].*}} alloc_count 1, size (ave/min/max) 20.00 / 20 / 20{{[[:space:]].*}} access_count (ave/min/max): 2.00 / 2 / 2
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main() {
+  // This is actually:
+  //  Stack for id STACKIDP:
+  //    #0 {{.*}} in operator new
+  //    #1 {{.*}} in main {{.*}}:@LINE+1
+  //  but we need to look for them in the same CHECK-DAG.
+  // CHECK-DAG: Stack for id [[STACKIDP]]:{{[[:space:]].*}} #0 {{.*}} in operator new{{.*[[:space:]].*}} #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  int *p = new int[10];
+
+  // This is actually:
+  //  Stack for id STACKIDQ:
+  //    #0 {{.*}} in operator new
+  //    #1 {{.*}} in main {{.*}}:@LINE+1
+  //  but we need to look for them in the same CHECK-DAG.
+  // CHECK-DAG: Stack for id [[STACKIDQ]]:{{[[:space:]].*}} #0 {{.*}} in operator new{{.*[[:space:]].*}} #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  int *q = new int[5];
+
+  memset(p, 1, 10);
+  memcpy(q, p, 5);
+  int x = memcmp(p, q, 5);
+
+  delete p;
+  delete q;
+
+  return x;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp b/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp
new file mode 100644
index 000000000000..d3cb300a0a30
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_new_load_store.cpp
@@ -0,0 +1,42 @@
+// Check profile with a single new call and set of loads and stores. Ensures
+// we get the same profile regardless of whether the memory is deallocated
+// before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// Try again with callbacks instead of inline sequences
+// RUN: %clangxx_memprof -mllvm -memprof-use-callbacks -O0 %s -o %t
+// RUN: %env_memprof_opts= %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//   alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:  Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK-NEXT:  lifetime (ave/min/max): [[AVELIFETIME:[0-9]+]].00 / [[AVELIFETIME]] / [[AVELIFETIME]]
+// CHECK-NEXT:  num migrated: 0, num lifetime overlaps: 0, num same alloc cpu: 0, num same dealloc_cpu: 0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in operator new
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = new int[10];
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  delete p;
+#endif
+
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/test_terse.cpp b/compiler-rt/test/memprof/TestCases/test_terse.cpp
new file mode 100644
index 000000000000..750b49e06bf7
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/test_terse.cpp
@@ -0,0 +1,31 @@
+// Check terse format profile with a single malloc call and set of loads and
+// stores. Ensures we get the same profile regardless of whether the memory is
+// deallocated before exit.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_terse=1 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_memprof -DFREE -O0 %s -o %t
+// RUN: %env_memprof_opts=print_terse=1 %run %t 2>&1 | FileCheck %s
+
+// CHECK: MIB:[[STACKID:[0-9]+]]/1/40.00/40/40/20.00/20/20/[[AVELIFETIME:[0-9]+]].00/[[AVELIFETIME]]/[[AVELIFETIME]]/0/0/0/0
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 {{.*}} in operator new
+// CHECK-NEXT: #1 {{.*}} in main {{.*}}:[[@LINE+6]]
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *p = new int[10];
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+#ifdef FREE
+  delete p;
+#endif
+
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp b/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp
new file mode 100644
index 000000000000..1d25ddf3ad2f
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp
@@ -0,0 +1,30 @@
+// RUN: %clangxx_memprof -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// This is actually:
+//  Memory allocation stack id = STACKID
+//    alloc_count 1, size (ave/min/max) 128.00 / 128 / 128
+// but we need to look for them in the same CHECK to get the correct STACKID.
+// CHECK:      Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 128.00 / 128 / 128
+// CHECK-NEXT:   access_count (ave/min/max): 7.00 / 7 / 7
+
+#include <sanitizer/memprof_interface.h>
+
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  // CHECK:      Stack for id [[STACKID]]:
+  // CHECK-NEXT:     #0 {{.*}} in operator new[](unsigned long)
+  // CHECK-NEXT:     #1 {{.*}} in main {{.*}}:[[@LINE+1]]
+  char *x = new char[128];
+  memset(x, 0xab, 128);
+  __sanitizer_unaligned_load16(x + 15);
+  __sanitizer_unaligned_load32(x + 15);
+  __sanitizer_unaligned_load64(x + 15);
+
+  __sanitizer_unaligned_store16(x + 15, 0);
+  __sanitizer_unaligned_store32(x + 15, 0);
+  __sanitizer_unaligned_store64(x + 15, 0);
+
+  delete[] x;
+  return 0;
+}

diff  --git a/compiler-rt/test/memprof/lit.cfg.py b/compiler-rt/test/memprof/lit.cfg.py
new file mode 100644
index 000000000000..79024b64d46a
--- /dev/null
+++ b/compiler-rt/test/memprof/lit.cfg.py
@@ -0,0 +1,103 @@
+# -*- Python -*-
+
+import os
+import platform
+import re
+
+import lit.formats
+
+# Get shlex.quote if available (added in 3.3), and fall back to pipes.quote if
+# it's not available.
+try:
+  import shlex
+  sh_quote = shlex.quote
+except:
+  import pipes
+  sh_quote = pipes.quote
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if attr_value == None:
+    lit_config.fatal(
+      "No attribute %r in test configuration! You may need to run "
+      "tests from your build directory or add this attribute "
+      "to lit.site.cfg.py " % attr_name)
+  return attr_value
+
+# Setup config name.
+config.name = 'MemProfiler' + config.name_suffix
+
+# Platform-specific default MEMPROF_OPTIONS for lit tests.
+default_memprof_opts = list(config.default_sanitizer_opts)
+
+default_memprof_opts_str = ':'.join(default_memprof_opts)
+if default_memprof_opts_str:
+  config.environment['MEMPROF_OPTIONS'] = default_memprof_opts_str
+config.substitutions.append(('%env_memprof_opts=',
+                             'env MEMPROF_OPTIONS=' + default_memprof_opts_str))
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+libdl_flag = '-ldl'
+
+# Setup default compiler flags used with -fmemory-profile option.
+# FIXME: Review the set of required flags and check if it can be reduced.
+target_cflags = [get_required_attr(config, 'target_cflags')]
+target_cxxflags = config.cxx_mode_flags + target_cflags
+clang_memprof_static_cflags = (['-fmemory-profile',
+                            '-mno-omit-leaf-frame-pointer',
+                            '-fno-omit-frame-pointer',
+                            '-fno-optimize-sibling-calls'] +
+                            config.debug_info_flags + target_cflags)
+clang_memprof_static_cxxflags = config.cxx_mode_flags + clang_memprof_static_cflags
+
+memprof_dynamic_flags = []
+if config.memprof_dynamic:
+  memprof_dynamic_flags = ['-shared-libsan']
+  config.available_features.add('memprof-dynamic-runtime')
+else:
+  config.available_features.add('memprof-static-runtime')
+clang_memprof_cflags = clang_memprof_static_cflags + memprof_dynamic_flags
+clang_memprof_cxxflags = clang_memprof_static_cxxflags + memprof_dynamic_flags
+
+def build_invocation(compile_flags):
+  return ' ' + ' '.join([config.clang] + compile_flags) + ' '
+
+config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
+config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
+config.substitutions.append( ("%clang_memprof ", build_invocation(clang_memprof_cflags)) )
+config.substitutions.append( ("%clangxx_memprof ", build_invocation(clang_memprof_cxxflags)) )
+if config.memprof_dynamic:
+  shared_libmemprof_path = os.path.join(config.compiler_rt_libdir, 'libclang_rt.memprof{}.so'.format(config.target_suffix))
+  config.substitutions.append( ("%shared_libmemprof", shared_libmemprof_path) )
+  config.substitutions.append( ("%clang_memprof_static ", build_invocation(clang_memprof_static_cflags)) )
+  config.substitutions.append( ("%clangxx_memprof_static ", build_invocation(clang_memprof_static_cxxflags)) )
+
+# Some tests uses C++11 features such as lambdas and need to pass -std=c++11.
+config.substitutions.append(("%stdcxx11 ", '-std=c++11 '))
+
+config.substitutions.append( ("%libdl", libdl_flag) )
+
+config.available_features.add('memprof-' + config.bits + '-bits')
+
+config.available_features.add('fast-unwinder-works')
+
+# Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
+new_ld_library_path = os.path.pathsep.join(
+  (config.compiler_rt_libdir, config.environment.get('LD_LIBRARY_PATH', '')))
+config.environment['LD_LIBRARY_PATH'] = new_ld_library_path
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cpp']
+
+config.substitutions.append(('%fPIC', '-fPIC'))
+config.substitutions.append(('%fPIE', '-fPIE'))
+config.substitutions.append(('%pie', '-pie'))
+
+# Only run the tests on supported OSs.
+if config.host_os not in ['Linux']:
+  config.unsupported = True
+
+if not config.parallelism_group:
+  config.parallelism_group = 'shadow-memory'

diff  --git a/compiler-rt/test/memprof/lit.site.cfg.py.in b/compiler-rt/test/memprof/lit.site.cfg.py.in
new file mode 100644
index 000000000000..ce7dbb1618f7
--- /dev/null
+++ b/compiler-rt/test/memprof/lit.site.cfg.py.in
@@ -0,0 +1,15 @@
+ at LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.name_suffix = "@MEMPROF_TEST_CONFIG_SUFFIX@"
+config.target_cflags = "@MEMPROF_TEST_TARGET_CFLAGS@"
+config.clang = "@MEMPROF_TEST_TARGET_CC@"
+config.bits = "@MEMPROF_TEST_BITS@"
+config.memprof_dynamic = @MEMPROF_TEST_DYNAMIC@
+config.target_arch = "@MEMPROF_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@MEMPROF_LIT_SOURCE_DIR@/lit.cfg.py")


        


More information about the llvm-commits mailing list