[llvm] [IR] Composable and Extensible Memory Cache Control Hints (PR #181612)
Fei Peng via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 00:49:20 PST 2026
https://github.com/fiigii created https://github.com/llvm/llvm-project/pull/181612
Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443
This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(), and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine, SimplifyCFG)
Assisted-by: Claude Code
>From 113d96e2370d376ce8b88507d00f0ec488ca3e3e Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 16 Feb 2026 00:48:41 -0800
Subject: [PATCH] [IR] Composable and Extensible Memory Cache Control Hints
Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443
This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(),
and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine,
SimplifyCFG)
Assisted-by: Claude Code
---
llvm/docs/LangRef.rst | 66 +++++++++++++++++
llvm/include/llvm/IR/FixedMetadataKinds.def | 1 +
llvm/lib/IR/Instruction.cpp | 8 ++-
llvm/lib/IR/Verifier.cpp | 70 +++++++++++++++++++
llvm/lib/Transforms/Utils/Local.cpp | 6 ++
llvm/test/Transforms/GVN/mem-cache-hint.ll | 42 +++++++++++
.../Transforms/InstCombine/mem-cache-hint.ll | 15 ++++
.../Transforms/SimplifyCFG/mem-cache-hint.ll | 25 +++++++
llvm/test/Verifier/mem-cache-hint.ll | 68 ++++++++++++++++++
9 files changed, 298 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
create mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
create mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
create mode 100644 llvm/test/Verifier/mem-cache-hint.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 50a2515f69189..50d16eee64e87 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7690,6 +7690,72 @@ The ``!captures`` attribute makes no statement about other uses of ``%x``, or
uses of the stored-to memory location after it has been overwritten with a
different value.
+'``mem.cache_hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``!mem.cache_hint`` metadata may be attached to any instruction that reads
+or writes memory. It provides target-specific cache control hints for the
+memory operation. This metadata is a performance hint: dropping or ignoring it
+must not change the observable behavior of the program.
+
+The value of ``!mem.cache_hint`` is a single metadata node containing a flat
+list of ``(operand_no, hint_node)`` pairs. Each ``operand_no`` is an ``i32``
+constant identifying a memory-object operand (not a raw IR operand). Each
+``hint_node`` is a metadata node containing target-prefixed key/value string
+pairs.
+
+The ``!mem.cache_hint`` node must contain an even number of entries, alternating
+``i32`` operand numbers and metadata nodes. Operand numbers must be unique within
+a ``!mem.cache_hint`` node and must be valid for the instruction. Keys within a
+single hint node must also be unique.
+
+Most instructions have a single memory-object operand (``operand_no = 0``).
+Copy-like instructions such as ``llvm.memcpy`` conceptually access multiple
+memory objects: e.g., destination is ``operand_no = 0`` and source is
+``operand_no = 1``.
+
+The hint node keys are prefixed with a target identifier (e.g., ``nvvm.``) and
+their interpretation is entirely target-dependent. The IR verifier enforces only
+the structural rules above; validation of target-specific keys and values is
+performed by the corresponding backend. Unsupported properties may be silently
+ignored during code generation.
+
+The following examples use ``nvvm.`` prefixed keys for NVIDIA GPU targets.
+Other targets may define their own prefixed keys.
+
+Example: load with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ %v = load i32, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1 }
+ !1 = !{ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l2_eviction", !"first",
+ !"nvvm.l2_prefetch_size", !"128B" }
+
+Example: store with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ store i32 %v, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1 }
+ !1 = !{ !"nvvm.l1_eviction", !"last",
+ !"nvvm.l2_eviction", !"last" }
+
+Example: memcpy with per-operand hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %d, ptr addrspace(1) %s,
+ i64 16, i1 false), !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1, i32 1, !2 }
+ !1 = !{ !"nvvm.l1_eviction", !"last" }
+ !2 = !{ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l2_prefetch_size", !"128B" }
+
.. _llvm.loop:
'``llvm.loop``'
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 98129985714b2..fa71e29366d0c 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -59,3 +59,4 @@ LLVM_FIXED_MD_KIND(MD_captures, "captures", 44)
LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45)
LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46)
LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47)
+LLVM_FIXED_MD_KIND(MD_mem_cache_hint, "mem.cache_hint", 48)
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 67fad4028df5f..4abaa7711c8e6 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -573,12 +573,14 @@ void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef<unsigned> Keep) {
// !annotation and !prof metadata does not impact semantics.
// !range, !nonnull and !align produce poison, so they are safe to speculate.
// !fpmath specifies floating-point precision and does not imply UB.
+ // !mem.cache_hint is a performance hint and does not imply UB.
// !noundef and various AA metadata must be dropped, as it generally produces
// immediate undefined behavior.
static const unsigned KnownIDs[] = {
- LLVMContext::MD_annotation, LLVMContext::MD_range,
- LLVMContext::MD_nonnull, LLVMContext::MD_align,
- LLVMContext::MD_fpmath, LLVMContext::MD_prof};
+ LLVMContext::MD_annotation, LLVMContext::MD_range,
+ LLVMContext::MD_nonnull, LLVMContext::MD_align,
+ LLVMContext::MD_fpmath, LLVMContext::MD_prof,
+ LLVMContext::MD_mem_cache_hint};
SmallVector<unsigned> KeepIDs;
KeepIDs.reserve(Keep.size() + std::size(KnownIDs));
append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f986f5406b2b3..8ad0874777135 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -546,6 +546,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitAccessGroupMetadata(const MDNode *MD);
void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
void visitAllocTokenMetadata(Instruction &I, MDNode *MD);
+ void visitMemCacheHintMetadata(Instruction &I, MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -5576,6 +5577,72 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
"expected integer constant", MD);
}
+void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
+ Check(I.mayReadOrWriteMemory(),
+ "!mem.cache_hint is only valid on memory operations", &I);
+
+ Check(MD->getNumOperands() % 2 == 0,
+ "!mem.cache_hint must have even number of operands "
+ "(operand_no, hint_node pairs)",
+ MD);
+
+ auto IsMemoryObjectOperand = [](const Value *V) {
+ return V->getType()->isPtrOrPtrVectorTy();
+ };
+
+ unsigned NumMemoryObjectOperands = 0;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
+ return IsMemoryObjectOperand(Arg.get());
+ });
+ else
+ NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
+ return IsMemoryObjectOperand(Op.get());
+ });
+
+ SmallVector<unsigned, 4> SeenOperandNos;
+
+ // Top-level metadata alternates: i32 operand_no, MDNode hint_node.
+ for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
+ auto *OpNoCI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+ Check(OpNoCI,
+ "!mem.cache_hint operand_no must be an integer constant in pair", MD);
+
+ Check(OpNoCI->getValue().isNonNegative(),
+ "!mem.cache_hint operand_no must be non-negative", MD);
+
+ uint64_t OperandNo = OpNoCI->getZExtValue();
+ Check(OperandNo < NumMemoryObjectOperands,
+ "!mem.cache_hint operand_no must refer to a valid memory object "
+ "operand",
+ &I);
+
+ Check(!is_contained(SeenOperandNos, OperandNo),
+ "!mem.cache_hint contains duplicate operand_no", MD);
+ SeenOperandNos.push_back(OperandNo);
+
+ const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
+ Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
+
+ Check(Node->getNumOperands() % 2 == 0,
+ "!mem.cache_hint hint node must have even number of operands "
+ "(key-value pairs)",
+ Node);
+
+ SmallVector<StringRef, 8> SeenKeys;
+ for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
+ const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
+ Check(Key, "!mem.cache_hint key must be a string", Node);
+
+ StringRef KeyStr = Key->getString();
+ Check(!is_contained(SeenKeys, KeyStr),
+ "!mem.cache_hint hint node contains duplicate key", Node);
+ SeenKeys.push_back(KeyStr);
+ // Values are target-specific and not validated here.
+ }
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -5808,6 +5875,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token))
visitAllocTokenMetadata(I, MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_mem_cache_hint))
+ visitMemCacheHintMetadata(I, MD);
+
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N, AreDebugLocsAllowed::Yes);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index bd617cd003a76..4b190d5b9f30a 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3038,6 +3038,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
if (!AAOnly)
K->setMetadata(Kind, JMD);
break;
+ case LLVMContext::MD_mem_cache_hint:
+ // Preserve !mem.cache_hint if it is present on both instructions.
+ if (!AAOnly)
+ K->setMetadata(Kind, JMD);
+ break;
case LLVMContext::MD_noalias_addrspace:
if (DoesKMove)
K->setMetadata(Kind,
@@ -3148,6 +3153,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
case LLVMContext::MD_alias_scope:
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_cache_hint:
case LLVMContext::MD_mem_parallel_loop_access:
case LLVMContext::MD_access_group:
case LLVMContext::MD_noundef:
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
new file mode 100644
index 0000000000000..2bac96e2a8e8c
--- /dev/null
+++ b/llvm/test/Transforms/GVN/mem-cache-hint.ll
@@ -0,0 +1,42 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+; Test that combineMetadata() preserves !mem.cache_hint only when present on
+; both instructions being merged.
+
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+; CHECK-LABEL: @both_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @both_hint(ptr %p) {
+ %a = load i64, ptr %p, !mem.cache_hint !0
+ %b = load i64, ptr %p, !mem.cache_hint !0
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+; CHECK-LABEL: @one_hint
+; CHECK: load
+; CHECK-NOT: !mem.cache_hint
+define i64 @one_hint(ptr %p) {
+ %a = load i64, ptr %p
+ %b = load i64, ptr %p, !mem.cache_hint !0
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads
+; The merged result is currently undefined.
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+; CHECK-LABEL: @diff_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @diff_hint(ptr %p) {
+ %a = load i64, ptr %p, !mem.cache_hint !0
+ %b = load i64, ptr %p, !mem.cache_hint !2
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
+!2 = !{ i32 0, !3 }
+!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
new file mode 100644
index 0000000000000..771a74de9b15a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
+; transformed (e.g., load+bitcast folded into a single load).
+
+; CHECK-LABEL: @cast_load_preserve_hint
+; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
+define i32 @cast_load_preserve_hint(ptr %p) {
+ %l = load float, ptr %p, !mem.cache_hint !0
+ %c = bitcast float %l to i32
+ ret i32 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
new file mode 100644
index 0000000000000..81bd0f8c12f2a
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
@@ -0,0 +1,25 @@
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; Test that SimplifyCFG speculates the conditional load and that
+; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
+; speculated load. mem.cache_hint is a performance hint and does not
+; imply UB, so it is safe to preserve.
+
+; CHECK-LABEL: @speculate_keeps_hint
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
+; CHECK-NEXT: ret i32 [[SEL]]
+define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
+entry:
+ br i1 %c, label %if, label %join
+if:
+ %v = load i32, ptr %p, !mem.cache_hint !0
+ br label %join
+join:
+ %phi = phi i32 [ %v, %if ], [ 0, %entry ]
+ ret i32 %phi
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
new file mode 100644
index 0000000000000..5b6af20682b20
--- /dev/null
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -0,0 +1,68 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+declare void @foo(i32, i32)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
+
+; CHECK: !mem.cache_hint is only valid on memory operations
+define void @non_memory_op(i32 %x, i32 %y) {
+ %z = add i32 %x, %y, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint must have even number of operands
+define void @odd_top_level_operands(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must be an integer constant in pair
+define void @operand_no_not_integer(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{!"zero", !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_not_pointer(i32 %x, i32 %y) {
+ call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_out_of_range(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 1, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint contains duplicate operand_no
+define void @duplicate_operand_no(ptr %p) {
+ call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %p, i64 8, i1 false), !mem.cache_hint !{
+ i32 0, !{!"nvvm.l1_eviction", !"first"},
+ i32 0, !{!"nvvm.l1_eviction", !"last"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node must be a metadata node
+define void @hint_node_not_mdnode(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !"not_a_node"}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node must have even number of operands
+define void @hint_node_odd_operands(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint key must be a string
+define void @key_not_string(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{i32 0, !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node contains duplicate key
+define void @duplicate_key(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{
+ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l1_eviction", !"last"}}
+ ret void
+}
More information about the llvm-commits
mailing list