[llvm] [MemProf][PGO] Prevent dropping of profile metadata during optimization (PR #121359)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 09:46:05 PST 2025
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/121359
>From 3e226cb0b8b2d5bcf2146bf73ada02e2cecba287 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Mon, 30 Dec 2024 13:02:07 -0800
Subject: [PATCH 1/3] [MemProf][PGO] Prevent dropping of profile metadata
during optimization
This patch fixes a couple of places where memprof-related metadata
(!memprof and !callsite) were being dropped, and one place where PGO
metadata (!prof) was being dropped.
All were due to instances of combineMetadata() being invoked. That
function drops all metadata not in the list provided by the client, and
also drops any not in its switch statement.
Memprof metadata needed a case in the combineMetadata switch statement.
For now we simply keep the metadata of the instruction being kept, which
doesn't retain all the profile information when two calls with
memprof metadata are being combined, but at least retains some.
For the memprof metadata being dropped during call CSE, add memprof and
callsite metadata to the list of known ids in combineMetadataForCSE.
Neither memprof nor regular prof metadata were in the list of known ids
for the callsite in MemCpyOptimizer, which was added to combine AA
metadata after optimization of byval arguments fed by memcpy
instructions, and similar types of optimizations of memcpy uses.
There is one other callsite of combineMetadata, but it is only invoked
on load instructions, which do not carry these types of metadata.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 9 ++--
llvm/lib/Transforms/Utils/Local.cpp | 8 ++-
llvm/test/Transforms/MemCpyOpt/memcpy.ll | 18 +++++++
.../SimplifyCFG/merge-calls-memprof.ll | 51 +++++++++++++++++++
4 files changed, 81 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bb98b3d1c07259..c04258da912bb2 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -345,10 +345,11 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_access_group};
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
+ LLVMContext::MD_access_group, LLVMContext::MD_prof,
+ LLVMContext::MD_memprof, LLVMContext::MD_callsite};
combineMetadata(ReplInst, I, KnownIDs, true);
}
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index a3af96d5af026d..ecea00a55f0c51 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3379,6 +3379,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
+ case LLVMContext::MD_memprof:
+ case LLVMContext::MD_callsite:
+ // Preserve !memprof and !callsite metadata on K.
+ break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
@@ -3442,7 +3446,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
LLVMContext::MD_nontemporal,
LLVMContext::MD_noundef,
LLVMContext::MD_mmra,
- LLVMContext::MD_noalias_addrspace};
+ LLVMContext::MD_noalias_addrspace,
+ LLVMContext::MD_memprof,
+ LLVMContext::MD_callsite};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 39b90adc74ef38..65d78f4199aa02 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
ret void
}
+define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) {
+; CHECK-LABEL: @byval_param_profile_metadata(
+; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4
+; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i32, align 4
+ store i32 1, ptr %ptr
+ call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false)
+ call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7
+ ret void
+}
+
define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) {
; CHECK-LABEL: @memcpy_memory_none(
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]]
@@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) {
!0 = !{!0}
!1 = !{!1, !0}
!2 = !{!1}
+!3 = !{!4}
+!4 = !{!5, !"cold"}
+!5 = !{i64 123, i64 456}
+!6 = !{i64 123}
+!7 = !{!"branch_weights", i32 10}
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
new file mode 100644
index 00000000000000..10c6aeb26ba767
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+;; Test to ensure that memprof related metadata is not dropped when
+;; instructions are combined. Currently the metadata from the first instruction
+;; is kept, which prevents full loss of profile context information.
+
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb(
+; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
+; CHECK-NEXT: ret ptr [[CALL]]
+;
+entry:
+ br i1 %b, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+ ret ptr %x.0
+}
+
+
+declare ptr @_Znwm(i64) nounwind readonly
+
+!0 = !{!1}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
+!3 = !{i64 -852997907418798798}
+!4 = !{!5}
+!5 = !{!6, !"cold"}
+!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
+!7 = !{i64 123}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
+; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
+; CHECK: [[META3]] = !{i64 -852997907418798798}
+;.
>From 133e8d73dab1577604d081c20554bf4ad50a12b6 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 2 Jan 2025 09:24:25 -0800
Subject: [PATCH 2/3] Add FIXME comments with reference to new issue, and
invoke newly created metadata merge functions for memprof and callsite
metadata.
---
llvm/include/llvm/IR/Metadata.h | 2 ++
llvm/include/llvm/Transforms/Utils/Local.h | 5 +++++
llvm/lib/Analysis/MemoryProfileInfo.cpp | 21 +++++++++++++++++++
.../Transforms/InstCombine/InstCombinePHI.cpp | 3 +++
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +++
llvm/lib/Transforms/Utils/Local.cpp | 11 +++++++++-
6 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 35580f3f38c615..df2384c5f6e69a 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -1464,6 +1464,8 @@ class MDNode : public Metadata {
static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B,
const Instruction *AInstr,
const Instruction *BInstr);
+ static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
+ static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
};
/// Tuple of metadata.
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index bbf29e6f46b47b..40c448593807bb 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr);
+/// DO NOT CALL EXTERNALLY.
+/// FIXME: https://github.com/llvm/llvm-project/issues/121495
+/// Once external callers of this function are removed, either inline into
+/// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
+///
/// Combine the metadata of two instructions so that K can replace J. Some
/// metadata kinds can only be kept if K does not move, meaning it dominated
/// J in the original IR.
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 1c3f589e849419..39b536886b6f0c 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -347,3 +347,24 @@ template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
return mdconst::dyn_extract<ConstantInt>(N->operands().back())
->getZExtValue();
}
+
+MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
+ if (!(A && B)) {
+ return A ? A : B;
+ }
+
+ // TODO: Support more sophisticated merging, such as selecting the one with
+ // more bytes allocated, or implement support for carrying multiple allocation
+ // leaf contexts. For now, keep the first one.
+ return A;
+}
+
+MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
+ if (!(A && B)) {
+ return A ? A : B;
+ }
+
+ // TODO: Support more sophisticated merging, which will require support for
+ // carrying multiple contexts. For now, keep the first one.
+ return A;
+}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 1fcf1c570addac..272a1942c33509 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -788,6 +788,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
BasicBlock *BB = std::get<0>(Incoming);
Value *V = std::get<1>(Incoming);
LoadInst *LI = cast<LoadInst>(V);
+ // FIXME: https://github.com/llvm/llvm-project/issues/121495
+ // Call combineMetadataForCSE instead, so that an explicit set of KnownIDs
+ // doesn't need to be maintained here.
combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index c04258da912bb2..5f7cb92d239bc1 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -350,6 +350,9 @@ static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group, LLVMContext::MD_prof,
LLVMContext::MD_memprof, LLVMContext::MD_callsite};
+ // FIXME: https://github.com/llvm/llvm-project/issues/121495
+ // Use custom AA metadata combining handling instead of combineMetadata, which
+ // is meant for CSE and will drop any metadata not in the KnownIDs list.
combineMetadata(ReplInst, I, KnownIDs, true);
}
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index ecea00a55f0c51..1e4061cb0771e5 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3308,6 +3308,9 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return Changed;
}
+// FIXME: https://github.com/llvm/llvm-project/issues/121495
+// Once external callers of this function are removed, either inline into
+// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
void llvm::combineMetadata(Instruction *K, const Instruction *J,
ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
@@ -3320,6 +3323,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
switch (Kind) {
default:
+ // FIXME: https://github.com/llvm/llvm-project/issues/121495
+ // Change to removing only explicitly listed other metadata, and assert
+ // on unknown metadata, to avoid inadvertently dropping newly added
+ // metadata types.
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
@@ -3380,8 +3387,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_memprof:
+ K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
+ break;
case LLVMContext::MD_callsite:
- // Preserve !memprof and !callsite metadata on K.
+ K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
>From 11ac81d4f46e0f5af67d56f27b54e3561bdb3dfd Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 2 Jan 2025 09:45:43 -0800
Subject: [PATCH 3/3] Simplify merging code
---
llvm/lib/Analysis/MemoryProfileInfo.cpp | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 39b536886b6f0c..2f3c87a89f9f98 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -349,22 +349,18 @@ template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
}
MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
- if (!(A && B)) {
- return A ? A : B;
- }
-
// TODO: Support more sophisticated merging, such as selecting the one with
// more bytes allocated, or implement support for carrying multiple allocation
// leaf contexts. For now, keep the first one.
- return A;
+ if (A)
+ return A;
+ return B;
}
MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
- if (!(A && B)) {
- return A ? A : B;
- }
-
// TODO: Support more sophisticated merging, which will require support for
// carrying multiple contexts. For now, keep the first one.
- return A;
+ if (A)
+ return A;
+ return B;
}
More information about the llvm-commits
mailing list