[llvm] [IR] Composable and Extensible Memory Cache Control Hints (PR #181612)

Fei Peng via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 16 00:49:20 PST 2026


https://github.com/fiigii created https://github.com/llvm/llvm-project/pull/181612

Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443

This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(), and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine, SimplifyCFG)

Assisted-by: Claude Code

>From 113d96e2370d376ce8b88507d00f0ec488ca3e3e Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 16 Feb 2026 00:48:41 -0800
Subject: [PATCH] [IR] Composable and Extensible Memory Cache Control Hints

Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443

This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(),
  and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine,
  SimplifyCFG)

Assisted-by: Claude Code
---
 llvm/docs/LangRef.rst                         | 66 +++++++++++++++++
 llvm/include/llvm/IR/FixedMetadataKinds.def   |  1 +
 llvm/lib/IR/Instruction.cpp                   |  8 ++-
 llvm/lib/IR/Verifier.cpp                      | 70 +++++++++++++++++++
 llvm/lib/Transforms/Utils/Local.cpp           |  6 ++
 llvm/test/Transforms/GVN/mem-cache-hint.ll    | 42 +++++++++++
 .../Transforms/InstCombine/mem-cache-hint.ll  | 15 ++++
 .../Transforms/SimplifyCFG/mem-cache-hint.ll  | 25 +++++++
 llvm/test/Verifier/mem-cache-hint.ll          | 68 ++++++++++++++++++
 9 files changed, 298 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
 create mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
 create mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
 create mode 100644 llvm/test/Verifier/mem-cache-hint.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 50a2515f69189..50d16eee64e87 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7690,6 +7690,72 @@ The ``!captures`` attribute makes no statement about other uses of ``%x``, or
 uses of the stored-to memory location after it has been overwritten with a
 different value.
 
+'``mem.cache_hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``!mem.cache_hint`` metadata may be attached to any instruction that reads
+or writes memory. It provides target-specific cache control hints for the
+memory operation. This metadata is a performance hint: dropping or ignoring it
+must not change the observable behavior of the program.
+
+The value of ``!mem.cache_hint`` is a single metadata node containing a flat
+list of ``(operand_no, hint_node)`` pairs. Each ``operand_no`` is an ``i32``
+constant identifying a memory-object operand (not a raw IR operand). Each
+``hint_node`` is a metadata node containing target-prefixed key/value string
+pairs.
+
+The ``!mem.cache_hint`` node must contain an even number of entries, alternating
+``i32`` operand numbers and metadata nodes. Operand numbers must be unique within
+a ``!mem.cache_hint`` node and must be valid for the instruction. Keys within a
+single hint node must also be unique.
+
+Most instructions have a single memory-object operand (``operand_no = 0``).
+Copy-like instructions such as ``llvm.memcpy`` conceptually access multiple
+memory objects: e.g., destination is ``operand_no = 0`` and source is
+``operand_no = 1``.
+
+The hint node keys are prefixed with a target identifier (e.g., ``nvvm.``) and
+their interpretation is entirely target-dependent. The IR verifier enforces only
+the structural rules above; validation of target-specific keys and values is
+performed by the corresponding backend. Unsupported properties may be silently
+ignored during code generation.
+
+The following examples use ``nvvm.`` prefixed keys for NVIDIA GPU targets.
+Other targets may define their own prefixed keys.
+
+Example: load with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    %v = load i32, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1 }
+    !1 = !{ !"nvvm.l1_eviction", !"first",
+            !"nvvm.l2_eviction", !"first",
+            !"nvvm.l2_prefetch_size", !"128B" }
+
+Example: store with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    store i32 %v, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1 }
+    !1 = !{ !"nvvm.l1_eviction", !"last",
+            !"nvvm.l2_eviction", !"last" }
+
+Example: memcpy with per-operand hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %d, ptr addrspace(1) %s,
+                                      i64 16, i1 false), !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1, i32 1, !2 }
+    !1 = !{ !"nvvm.l1_eviction", !"last" }
+    !2 = !{ !"nvvm.l1_eviction", !"first",
+            !"nvvm.l2_prefetch_size", !"128B" }
+
 .. _llvm.loop:
 
 '``llvm.loop``'
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 98129985714b2..fa71e29366d0c 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -59,3 +59,4 @@ LLVM_FIXED_MD_KIND(MD_captures, "captures", 44)
 LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45)
 LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46)
 LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47)
+LLVM_FIXED_MD_KIND(MD_mem_cache_hint, "mem.cache_hint", 48)
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 67fad4028df5f..4abaa7711c8e6 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -573,12 +573,14 @@ void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef<unsigned> Keep) {
   // !annotation and !prof metadata does not impact semantics.
   // !range, !nonnull and !align produce poison, so they are safe to speculate.
   // !fpmath specifies floating-point precision and does not imply UB.
+  // !mem.cache_hint is a performance hint and does not imply UB.
   // !noundef and various AA metadata must be dropped, as it generally produces
   // immediate undefined behavior.
   static const unsigned KnownIDs[] = {
-      LLVMContext::MD_annotation, LLVMContext::MD_range,
-      LLVMContext::MD_nonnull,    LLVMContext::MD_align,
-      LLVMContext::MD_fpmath,     LLVMContext::MD_prof};
+      LLVMContext::MD_annotation,    LLVMContext::MD_range,
+      LLVMContext::MD_nonnull,       LLVMContext::MD_align,
+      LLVMContext::MD_fpmath,        LLVMContext::MD_prof,
+      LLVMContext::MD_mem_cache_hint};
   SmallVector<unsigned> KeepIDs;
   KeepIDs.reserve(Keep.size() + std::size(KnownIDs));
   append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f986f5406b2b3..8ad0874777135 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -546,6 +546,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   void visitAccessGroupMetadata(const MDNode *MD);
   void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
   void visitAllocTokenMetadata(Instruction &I, MDNode *MD);
+  void visitMemCacheHintMetadata(Instruction &I, MDNode *MD);
 
   template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -5576,6 +5577,72 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
         "expected integer constant", MD);
 }
 
+void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
+  Check(I.mayReadOrWriteMemory(),
+        "!mem.cache_hint is only valid on memory operations", &I);
+
+  Check(MD->getNumOperands() % 2 == 0,
+        "!mem.cache_hint must have even number of operands "
+        "(operand_no, hint_node pairs)",
+        MD);
+
+  auto IsMemoryObjectOperand = [](const Value *V) {
+    return V->getType()->isPtrOrPtrVectorTy();
+  };
+
+  unsigned NumMemoryObjectOperands = 0;
+  if (const auto *CB = dyn_cast<CallBase>(&I))
+    NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
+      return IsMemoryObjectOperand(Arg.get());
+    });
+  else
+    NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
+      return IsMemoryObjectOperand(Op.get());
+    });
+
+  SmallVector<unsigned, 4> SeenOperandNos;
+
+  // Top-level metadata alternates: i32 operand_no, MDNode hint_node.
+  for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
+    auto *OpNoCI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+    Check(OpNoCI,
+          "!mem.cache_hint operand_no must be an integer constant in pair", MD);
+
+    Check(OpNoCI->getValue().isNonNegative(),
+          "!mem.cache_hint operand_no must be non-negative", MD);
+
+    uint64_t OperandNo = OpNoCI->getZExtValue();
+    Check(OperandNo < NumMemoryObjectOperands,
+          "!mem.cache_hint operand_no must refer to a valid memory object "
+          "operand",
+          &I);
+
+    Check(!is_contained(SeenOperandNos, OperandNo),
+          "!mem.cache_hint contains duplicate operand_no", MD);
+    SeenOperandNos.push_back(OperandNo);
+
+    const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
+    Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
+
+    Check(Node->getNumOperands() % 2 == 0,
+          "!mem.cache_hint hint node must have even number of operands "
+          "(key-value pairs)",
+          Node);
+
+    SmallVector<StringRef, 8> SeenKeys;
+    for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
+      const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
+      Check(Key, "!mem.cache_hint key must be a string", Node);
+
+      StringRef KeyStr = Key->getString();
+      Check(!is_contained(SeenKeys, KeyStr),
+            "!mem.cache_hint hint node contains duplicate key", Node);
+      SeenKeys.push_back(KeyStr);
+      // Values are target-specific and not validated here.
+    }
+  }
+}
+
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
@@ -5808,6 +5875,9 @@ void Verifier::visitInstruction(Instruction &I) {
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token))
     visitAllocTokenMetadata(I, MD);
 
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_mem_cache_hint))
+    visitMemCacheHintMetadata(I, MD);
+
   if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
     CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
     visitMDNode(*N, AreDebugLocsAllowed::Yes);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index bd617cd003a76..4b190d5b9f30a 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3038,6 +3038,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
         if (!AAOnly)
           K->setMetadata(Kind, JMD);
         break;
+      case LLVMContext::MD_mem_cache_hint:
+        // Preserve !mem.cache_hint if it is present on both instructions.
+        if (!AAOnly)
+          K->setMetadata(Kind, JMD);
+        break;
       case LLVMContext::MD_noalias_addrspace:
         if (DoesKMove)
           K->setMetadata(Kind,
@@ -3148,6 +3153,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
     case LLVMContext::MD_alias_scope:
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
+    case LLVMContext::MD_mem_cache_hint:
     case LLVMContext::MD_mem_parallel_loop_access:
     case LLVMContext::MD_access_group:
     case LLVMContext::MD_noundef:
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
new file mode 100644
index 0000000000000..2bac96e2a8e8c
--- /dev/null
+++ b/llvm/test/Transforms/GVN/mem-cache-hint.ll
@@ -0,0 +1,42 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+; Test that combineMetadata() preserves !mem.cache_hint only when present on
+; both instructions being merged.
+
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+; CHECK-LABEL: @both_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @both_hint(ptr %p) {
+  %a = load i64, ptr %p, !mem.cache_hint !0
+  %b = load i64, ptr %p, !mem.cache_hint !0
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+; CHECK-LABEL: @one_hint
+; CHECK: load
+; CHECK-NOT: !mem.cache_hint
+define i64 @one_hint(ptr %p) {
+  %a = load i64, ptr %p
+  %b = load i64, ptr %p, !mem.cache_hint !0
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads
+; The merged result is currently undefined. 
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+; CHECK-LABEL: @diff_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @diff_hint(ptr %p) {
+  %a = load i64, ptr %p, !mem.cache_hint !0
+  %b = load i64, ptr %p, !mem.cache_hint !2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
+!2 = !{ i32 0, !3 }
+!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
new file mode 100644
index 0000000000000..771a74de9b15a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
+; transformed (e.g., load+bitcast folded into a single load).
+
+; CHECK-LABEL: @cast_load_preserve_hint
+; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
+define i32 @cast_load_preserve_hint(ptr %p) {
+  %l = load float, ptr %p, !mem.cache_hint !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
new file mode 100644
index 0000000000000..81bd0f8c12f2a
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
@@ -0,0 +1,25 @@
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; Test that SimplifyCFG speculates the conditional load and that
+; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
+; speculated load. mem.cache_hint is a performance hint and does not
+; imply UB, so it is safe to preserve.
+
+; CHECK-LABEL: @speculate_keeps_hint
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
+; CHECK-NEXT:    ret i32 [[SEL]]
+define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
+entry:
+  br i1 %c, label %if, label %join
+if:
+  %v = load i32, ptr %p, !mem.cache_hint !0
+  br label %join
+join:
+  %phi = phi i32 [ %v, %if ], [ 0, %entry ]
+  ret i32 %phi
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
new file mode 100644
index 0000000000000..5b6af20682b20
--- /dev/null
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -0,0 +1,68 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+declare void @foo(i32, i32)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
+
+; CHECK: !mem.cache_hint is only valid on memory operations
+define void @non_memory_op(i32 %x, i32 %y) {
+  %z = add i32 %x, %y, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint must have even number of operands
+define void @odd_top_level_operands(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must be an integer constant in pair
+define void @operand_no_not_integer(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{!"zero", !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_not_pointer(i32 %x, i32 %y) {
+  call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_out_of_range(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 1, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint contains duplicate operand_no
+define void @duplicate_operand_no(ptr %p) {
+  call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %p, i64 8, i1 false), !mem.cache_hint !{
+      i32 0, !{!"nvvm.l1_eviction", !"first"},
+      i32 0, !{!"nvvm.l1_eviction", !"last"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node must be a metadata node
+define void @hint_node_not_mdnode(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !"not_a_node"}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node must have even number of operands
+define void @hint_node_odd_operands(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint key must be a string
+define void @key_not_string(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{i32 0, !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node contains duplicate key
+define void @duplicate_key(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{
+      !"nvvm.l1_eviction", !"first",
+      !"nvvm.l1_eviction", !"last"}}
+  ret void
+}



More information about the llvm-commits mailing list