[llvm] 17993eb - [Memprof] Adds instrumentation support for memprof with histograms. (#100834)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 16:09:40 PDT 2024
Author: Matthew Weingarten
Date: 2024-07-29T16:09:37-07:00
New Revision: 17993eb162c513fd03753237c5c8229fd629148b
URL: https://github.com/llvm/llvm-project/commit/17993eb162c513fd03753237c5c8229fd629148b
DIFF: https://github.com/llvm/llvm-project/commit/17993eb162c513fd03753237c5c8229fd629148b.diff
LOG: [Memprof] Adds instrumentation support for memprof with histograms. (#100834)
This patch allows running `-fmemory-profile` without the flag
`-memprof-use-callbacks`, meaning the `RecordAccessesHistogram` is
injected into IR as a sequence of instructions. This significantly
increases performance of the instrumented binary.
Added:
llvm/test/Instrumentation/HeapProfiler/basic-histogram.ll
Modified:
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2c5d749d4a67a..445bf0bb26cc4 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -61,6 +61,9 @@ constexpr int LLVM_MEM_PROFILER_VERSION = 1;
// Size of memory mapped to a single shadow location.
constexpr uint64_t DefaultMemGranularity = 64;
+// Size of memory mapped to a single histogram bucket.
+constexpr uint64_t HistogramGranularity = 8;
+
// Scale from granularity down to shadow size.
constexpr uint64_t DefaultShadowScale = 3;
@@ -192,7 +195,7 @@ namespace {
struct ShadowMapping {
ShadowMapping() {
Scale = ClMappingScale;
- Granularity = ClMappingGranularity;
+ Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
Mask = ~(Granularity - 1);
}
@@ -276,6 +279,8 @@ MemProfilerPass::MemProfilerPass() = default;
PreservedAnalyses MemProfilerPass::run(Function &F,
AnalysisManager<Function> &AM) {
+ assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
+ "Memprof with histogram only supports default mapping granularity");
Module &M = *F.getParent();
MemProfiler Profiler(M);
if (Profiler.instrumentFunction(F))
@@ -288,10 +293,6 @@ ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
AnalysisManager<Module> &AM) {
- assert((!ClHistogram || (ClHistogram && ClUseCalls)) &&
- "Cannot use -memprof-histogram without Callbacks. Set "
- "memprof-use-callbacks");
-
ModuleMemProfiler Profiler(M);
if (Profiler.instrumentModule(M))
return PreservedAnalyses::none();
@@ -489,14 +490,21 @@ void MemProfiler::instrumentAddress(Instruction *OrigIns,
return;
}
- // Create an inline sequence to compute shadow location, and increment the
- // value by one.
- Type *ShadowTy = Type::getInt64Ty(*C);
+ Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
- Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1);
+ // If we are profiling with histograms, add overflow protection at 255.
+ if (ClHistogram) {
+ Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
+ Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
+ Instruction *IncBlock =
+ SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
+ IRB.SetInsertPoint(IncBlock);
+ }
+ Value *Inc = ConstantInt::get(ShadowTy, 1);
ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
IRB.CreateStore(ShadowValue, ShadowAddr);
}
diff --git a/llvm/test/Instrumentation/HeapProfiler/basic-histogram.ll b/llvm/test/Instrumentation/HeapProfiler/basic-histogram.ll
new file mode 100644
index 0000000000000..c7ff129e5b4c4
--- /dev/null
+++ b/llvm/test/Instrumentation/HeapProfiler/basic-histogram.ll
@@ -0,0 +1,57 @@
+; Test basic memory profiler instrumentation with histograms.
+;
+; RUN: opt < %s -passes='function(memprof),memprof-module' -memprof-histogram -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @llvm.used = appending global [1 x ptr] [ptr @memprof.module_ctor]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @memprof.module_ctor, ptr null }]
+
+define i32 @test_load(ptr %a) {
+entry:
+ %tmp1 = load i32, ptr %a, align 4
+ ret i32 %tmp1
+}
+; CHECK-LABEL: @test_load
+; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
+; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
+; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -8
+; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
+; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
+; CHECK-NEXT: %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK-NEXT: %[[LOAD_SHADOW:[^ ]*]] = load i8, ptr %[[LOAD_SHADOW_PTR]]
+; CHECK-NEXT: %[[ICMP_MAX_COUNT:[^ ]*]] = icmp ult i8 %[[LOAD_SHADOW]], -1
+; CHECK-NEXT: br i1 %[[ICMP_MAX_COUNT]], label %[[INC_LABEL:[^ ]*]], label %[[ELSE_LABEL:[^ ]*]]
+; CHECK: [[INC_LABEL]]:
+; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i8 %[[LOAD_SHADOW]], 1
+; CHECK-NEXT: store i8 %[[NEW_SHADOW]], ptr %[[LOAD_SHADOW_PTR]]
+; CHECK-NEXT: br label %[[ELSE_LABEL]]
+; The actual load.
+; CHECK: [[ELSE_LABEL]]:
+; CHECK-NEXT: %tmp1 = load i32, ptr %a
+; CHECK-NEXT: ret i32 %tmp1
+
+define void @test_store(ptr %a) {
+entry:
+ store i32 42, ptr %a, align 4
+ ret void
+}
+; CHECK-LABEL: @test_store
+; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
+; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
+; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -8
+; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
+; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
+; CHECK-NEXT: %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK-NEXT: %[[STORE_SHADOW:[^ ]*]] = load i8, ptr %[[STORE_SHADOW_PTR]]
+; CHECK-NEXT: %[[ICMP_MAX_COUNT:[^ ]*]] = icmp ult i8 %[[STORE_SHADOW]], -1
+; CHECK-NEXT: br i1 %[[ICMP_MAX_COUNT]], label %[[INC_LABEL:[^ ]*]], label %[[ELSE_LABEL:[^ ]*]]
+; CHECK: [[INC_LABEL]]:
+; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i8 %[[STORE_SHADOW]], 1
+; CHECK-NEXT: store i8 %[[NEW_SHADOW]], ptr %[[STORE_SHADOW_PTR]]
+; CHECK-NEXT: br label %[[ELSE_LABEL]]
+; The actual store.
+; CHECK: [[ELSE_LABEL]]:
+; CHECK-NEXT: store i32 42, ptr %a, align 4
+; CHECK-NEXT: ret void
\ No newline at end of file
More information about the llvm-commits
mailing list