[llvm] [IR] Composable and Extensible Memory Cache Control Hints (PR #181612)

Fei Peng via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 23 20:18:53 PDT 2026


https://github.com/fiigii updated https://github.com/llvm/llvm-project/pull/181612

>From d13d82f01e778f1938595aa3571e56de2b459b20 Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 16 Feb 2026 00:48:41 -0800
Subject: [PATCH 1/2] [IR] Composable and Extensible Memory Cache Control Hints

Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443

This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(),
  and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine,
  SimplifyCFG)

Assisted-by: Claude Code
---
 llvm/docs/LangRef.rst                         | 66 +++++++++++++++++
 llvm/include/llvm/IR/FixedMetadataKinds.def   |  1 +
 llvm/lib/IR/Instruction.cpp                   |  8 ++-
 llvm/lib/IR/Verifier.cpp                      | 70 +++++++++++++++++++
 llvm/lib/Transforms/Utils/Local.cpp           |  6 ++
 llvm/test/Transforms/GVN/mem-cache-hint.ll    | 42 +++++++++++
 .../Transforms/InstCombine/mem-cache-hint.ll  | 15 ++++
 .../Transforms/SimplifyCFG/mem-cache-hint.ll  | 25 +++++++
 llvm/test/Verifier/mem-cache-hint.ll          | 68 ++++++++++++++++++
 9 files changed, 298 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
 create mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
 create mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
 create mode 100644 llvm/test/Verifier/mem-cache-hint.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 13883883d3981..e332836bd7b3c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7768,6 +7768,72 @@ The ``!captures`` attribute makes no statement about other uses of ``%x``, or
 uses of the stored-to memory location after it has been overwritten with a
 different value.
 
+'``mem.cache_hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``!mem.cache_hint`` metadata may be attached to any instruction that reads
+or writes memory. It provides target-specific cache control hints for the
+memory operation. This metadata is a performance hint: dropping or ignoring it
+must not change the observable behavior of the program.
+
+The value of ``!mem.cache_hint`` is a single metadata node containing a flat
+list of ``(operand_no, hint_node)`` pairs. Each ``operand_no`` is an ``i32``
+constant identifying a memory-object operand (not a raw IR operand). Each
+``hint_node`` is a metadata node containing target-prefixed key/value string
+pairs.
+
+The ``!mem.cache_hint`` node must contain an even number of entries, alternating
+``i32`` operand numbers and metadata nodes. Operand numbers must be unique within
+a ``!mem.cache_hint`` node and must be valid for the instruction. Keys within a
+single hint node must also be unique.
+
+Most instructions have a single memory-object operand (``operand_no = 0``).
+Copy-like instructions such as ``llvm.memcpy`` conceptually access multiple
+memory objects: e.g., destination is ``operand_no = 0`` and source is
+``operand_no = 1``.
+
+The hint node keys are prefixed with a target identifier (e.g., ``nvvm.``) and
+their interpretation is entirely target-dependent. The IR verifier enforces only
+the structural rules above; validation of target-specific keys and values is
+performed by the corresponding backend. Unsupported properties may be silently
+ignored during code generation.
+
+The following examples use ``nvvm.`` prefixed keys for NVIDIA GPU targets.
+Other targets may define their own prefixed keys.
+
+Example: load with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    %v = load i32, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1 }
+    !1 = !{ !"nvvm.l1_eviction", !"first",
+            !"nvvm.l2_eviction", !"first",
+            !"nvvm.l2_prefetch_size", !"128B" }
+
+Example: store with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    store i32 %v, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1 }
+    !1 = !{ !"nvvm.l1_eviction", !"last",
+            !"nvvm.l2_eviction", !"last" }
+
+Example: memcpy with per-operand hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %d, ptr addrspace(1) %s,
+                                      i64 16, i1 false), !mem.cache_hint !0
+
+    !0 = !{ i32 0, !1, i32 1, !2 }
+    !1 = !{ !"nvvm.l1_eviction", !"last" }
+    !2 = !{ !"nvvm.l1_eviction", !"first",
+            !"nvvm.l2_prefetch_size", !"128B" }
+
 .. _llvm.loop:
 
 '``llvm.loop``'
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 0d79677d7079e..b8cacb21f82ef 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -60,3 +60,4 @@ LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45)
 LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46)
 LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47)
 LLVM_FIXED_MD_KIND(MD_call_target, "call_target", 48)
+LLVM_FIXED_MD_KIND(MD_mem_cache_hint, "mem.cache_hint", 49)
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 5205d36a228c1..f7450734d65ab 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -573,12 +573,14 @@ void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef<unsigned> Keep) {
   // !annotation and !prof metadata does not impact semantics.
   // !range, !nonnull and !align produce poison, so they are safe to speculate.
   // !fpmath specifies floating-point precision and does not imply UB.
+  // !mem.cache_hint is a performance hint and does not imply UB.
   // !noundef and various AA metadata must be dropped, as it generally produces
   // immediate undefined behavior.
   static const unsigned KnownIDs[] = {
-      LLVMContext::MD_annotation, LLVMContext::MD_range,
-      LLVMContext::MD_nonnull,    LLVMContext::MD_align,
-      LLVMContext::MD_fpmath,     LLVMContext::MD_prof};
+      LLVMContext::MD_annotation,    LLVMContext::MD_range,
+      LLVMContext::MD_nonnull,       LLVMContext::MD_align,
+      LLVMContext::MD_fpmath,        LLVMContext::MD_prof,
+      LLVMContext::MD_mem_cache_hint};
   SmallVector<unsigned> KeepIDs;
   KeepIDs.reserve(Keep.size() + std::size(KnownIDs));
   append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f4d4f81c12124..de636947bce23 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -549,6 +549,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   void visitAccessGroupMetadata(const MDNode *MD);
   void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
   void visitAllocTokenMetadata(Instruction &I, MDNode *MD);
+  void visitMemCacheHintMetadata(Instruction &I, MDNode *MD);
 
   template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -5599,6 +5600,72 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
         "expected integer constant", MD);
 }
 
+void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
+  Check(I.mayReadOrWriteMemory(),
+        "!mem.cache_hint is only valid on memory operations", &I);
+
+  Check(MD->getNumOperands() % 2 == 0,
+        "!mem.cache_hint must have even number of operands "
+        "(operand_no, hint_node pairs)",
+        MD);
+
+  auto IsMemoryObjectOperand = [](const Value *V) {
+    return V->getType()->isPtrOrPtrVectorTy();
+  };
+
+  unsigned NumMemoryObjectOperands = 0;
+  if (const auto *CB = dyn_cast<CallBase>(&I))
+    NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
+      return IsMemoryObjectOperand(Arg.get());
+    });
+  else
+    NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
+      return IsMemoryObjectOperand(Op.get());
+    });
+
+  SmallVector<unsigned, 4> SeenOperandNos;
+
+  // Top-level metadata alternates: i32 operand_no, MDNode hint_node.
+  for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
+    auto *OpNoCI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+    Check(OpNoCI,
+          "!mem.cache_hint operand_no must be an integer constant in pair", MD);
+
+    Check(OpNoCI->getValue().isNonNegative(),
+          "!mem.cache_hint operand_no must be non-negative", MD);
+
+    uint64_t OperandNo = OpNoCI->getZExtValue();
+    Check(OperandNo < NumMemoryObjectOperands,
+          "!mem.cache_hint operand_no must refer to a valid memory object "
+          "operand",
+          &I);
+
+    Check(!is_contained(SeenOperandNos, OperandNo),
+          "!mem.cache_hint contains duplicate operand_no", MD);
+    SeenOperandNos.push_back(OperandNo);
+
+    const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
+    Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
+
+    Check(Node->getNumOperands() % 2 == 0,
+          "!mem.cache_hint hint node must have even number of operands "
+          "(key-value pairs)",
+          Node);
+
+    SmallVector<StringRef, 8> SeenKeys;
+    for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
+      const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
+      Check(Key, "!mem.cache_hint key must be a string", Node);
+
+      StringRef KeyStr = Key->getString();
+      Check(!is_contained(SeenKeys, KeyStr),
+            "!mem.cache_hint hint node contains duplicate key", Node);
+      SeenKeys.push_back(KeyStr);
+      // Values are target-specific and not validated here.
+    }
+  }
+}
+
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
@@ -5831,6 +5898,9 @@ void Verifier::visitInstruction(Instruction &I) {
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token))
     visitAllocTokenMetadata(I, MD);
 
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_mem_cache_hint))
+    visitMemCacheHintMetadata(I, MD);
+
   if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
     CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
     visitMDNode(*N, AreDebugLocsAllowed::Yes);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 84c0989a7fe07..5a7097e951c61 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3032,6 +3032,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
         if (!AAOnly)
           K->setMetadata(Kind, JMD);
         break;
+      case LLVMContext::MD_mem_cache_hint:
+        // Preserve !mem.cache_hint if it is present on both instructions.
+        if (!AAOnly)
+          K->setMetadata(Kind, JMD);
+        break;
       case LLVMContext::MD_noalias_addrspace:
         if (DoesKMove)
           K->setMetadata(Kind,
@@ -3142,6 +3147,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
     case LLVMContext::MD_alias_scope:
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
+    case LLVMContext::MD_mem_cache_hint:
     case LLVMContext::MD_mem_parallel_loop_access:
     case LLVMContext::MD_access_group:
     case LLVMContext::MD_noundef:
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
new file mode 100644
index 0000000000000..2bac96e2a8e8c
--- /dev/null
+++ b/llvm/test/Transforms/GVN/mem-cache-hint.ll
@@ -0,0 +1,42 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+; Test that combineMetadata() preserves !mem.cache_hint only when present on
+; both instructions being merged.
+
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+; CHECK-LABEL: @both_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @both_hint(ptr %p) {
+  %a = load i64, ptr %p, !mem.cache_hint !0
+  %b = load i64, ptr %p, !mem.cache_hint !0
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+; CHECK-LABEL: @one_hint
+; CHECK: load
+; CHECK-NOT: !mem.cache_hint
+define i64 @one_hint(ptr %p) {
+  %a = load i64, ptr %p
+  %b = load i64, ptr %p, !mem.cache_hint !0
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads
+; The merged result is currently undefined. 
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+; CHECK-LABEL: @diff_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @diff_hint(ptr %p) {
+  %a = load i64, ptr %p, !mem.cache_hint !0
+  %b = load i64, ptr %p, !mem.cache_hint !2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
+!2 = !{ i32 0, !3 }
+!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
new file mode 100644
index 0000000000000..771a74de9b15a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
+; transformed (e.g., load+bitcast folded into a single load).
+
+; CHECK-LABEL: @cast_load_preserve_hint
+; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
+define i32 @cast_load_preserve_hint(ptr %p) {
+  %l = load float, ptr %p, !mem.cache_hint !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
new file mode 100644
index 0000000000000..81bd0f8c12f2a
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
@@ -0,0 +1,25 @@
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; Test that SimplifyCFG speculates the conditional load and that
+; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
+; speculated load. mem.cache_hint is a performance hint and does not
+; imply UB, so it is safe to preserve.
+
+; CHECK-LABEL: @speculate_keeps_hint
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
+; CHECK-NEXT:    ret i32 [[SEL]]
+define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
+entry:
+  br i1 %c, label %if, label %join
+if:
+  %v = load i32, ptr %p, !mem.cache_hint !0
+  br label %join
+join:
+  %phi = phi i32 [ %v, %if ], [ 0, %entry ]
+  ret i32 %phi
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
new file mode 100644
index 0000000000000..5b6af20682b20
--- /dev/null
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -0,0 +1,68 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+declare void @foo(i32, i32)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
+
+; CHECK: !mem.cache_hint is only valid on memory operations
+define void @non_memory_op(i32 %x, i32 %y) {
+  %z = add i32 %x, %y, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint must have even number of operands
+define void @odd_top_level_operands(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must be an integer constant in pair
+define void @operand_no_not_integer(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{!"zero", !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_not_pointer(i32 %x, i32 %y) {
+  call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_out_of_range(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 1, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint contains duplicate operand_no
+define void @duplicate_operand_no(ptr %p) {
+  call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %p, i64 8, i1 false), !mem.cache_hint !{
+      i32 0, !{!"nvvm.l1_eviction", !"first"},
+      i32 0, !{!"nvvm.l1_eviction", !"last"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node must be a metadata node
+define void @hint_node_not_mdnode(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !"not_a_node"}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node must have even number of operands
+define void @hint_node_odd_operands(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint key must be a string
+define void @key_not_string(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{i32 0, !"first"}}
+  ret void
+}
+
+; CHECK: !mem.cache_hint hint node contains duplicate key
+define void @duplicate_key(ptr %p) {
+  %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{
+      !"nvvm.l1_eviction", !"first",
+      !"nvvm.l1_eviction", !"last"}}
+  ret void
+}

>From 02e57f05ce6460e14ccbd87ba067898bbb4f784d Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 23 Mar 2026 16:06:49 -0700
Subject: [PATCH 2/2] [IR] Address review comments for !mem.cache_hint metadata

- Forbid !mem.cache_hint on non-intrinsic calls to avoid IR validity
  depending on function attributes
- Simplify count_if lambda to use Value* directly for CallBase args
- Use SmallDenseSet instead of SmallVector for duplicate detection
- Merge standalone test files into existing common test files
  (loadstore-metadata.ll, hoist-with-metadata.ll) and regenerate
  CHECK lines with update_test_checks.py
- Add verifier test for non-intrinsic call restriction
---
 llvm/lib/IR/Verifier.cpp                      | 22 ++++----
 llvm/test/Transforms/GVN/mem-cache-hint.ll    | 42 ---------------
 llvm/test/Transforms/GVN/metadata.ll          | 51 +++++++++++++++++++
 .../InstCombine/loadstore-metadata.ll         | 17 +++++++
 .../Transforms/InstCombine/mem-cache-hint.ll  | 15 ------
 .../SimplifyCFG/hoist-with-metadata.ll        | 22 ++++++++
 .../Transforms/SimplifyCFG/mem-cache-hint.ll  | 25 ---------
 llvm/test/Verifier/mem-cache-hint.ll          | 12 +++--
 8 files changed, 110 insertions(+), 96 deletions(-)
 delete mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
 delete mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
 delete mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index de636947bce23..81e45b9886410 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5614,16 +5614,18 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
   };
 
   unsigned NumMemoryObjectOperands = 0;
-  if (const auto *CB = dyn_cast<CallBase>(&I))
-    NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
-      return IsMemoryObjectOperand(Arg.get());
-    });
-  else
+  if (const auto *CB = dyn_cast<CallBase>(&I)) {
+    Check(CB->getIntrinsicID() != Intrinsic::not_intrinsic,
+          "!mem.cache_hint is not supported on non-intrinsic calls", &I);
+    NumMemoryObjectOperands = count_if(
+        CB->args(), [&](Value *Arg) { return IsMemoryObjectOperand(Arg); });
+  } else {
     NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
       return IsMemoryObjectOperand(Op.get());
     });
+  }
 
-  SmallVector<unsigned, 4> SeenOperandNos;
+  SmallDenseSet<unsigned, 4> SeenOperandNos;
 
   // Top-level metadata alternates: i32 operand_no, MDNode hint_node.
   for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
@@ -5640,9 +5642,8 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
           "operand",
           &I);
 
-    Check(!is_contained(SeenOperandNos, OperandNo),
+    Check(SeenOperandNos.insert(OperandNo).second,
           "!mem.cache_hint contains duplicate operand_no", MD);
-    SeenOperandNos.push_back(OperandNo);
 
     const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
     Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
@@ -5652,15 +5653,14 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
           "(key-value pairs)",
           Node);
 
-    SmallVector<StringRef, 8> SeenKeys;
+    SmallDenseSet<StringRef, 8> SeenKeys;
     for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
       const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
       Check(Key, "!mem.cache_hint key must be a string", Node);
 
       StringRef KeyStr = Key->getString();
-      Check(!is_contained(SeenKeys, KeyStr),
+      Check(SeenKeys.insert(KeyStr).second,
             "!mem.cache_hint hint node contains duplicate key", Node);
-      SeenKeys.push_back(KeyStr);
       // Values are target-specific and not validated here.
     }
   }
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
deleted file mode 100644
index 2bac96e2a8e8c..0000000000000
--- a/llvm/test/Transforms/GVN/mem-cache-hint.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt -passes=gvn -S < %s | FileCheck %s
-
-; Test that combineMetadata() preserves !mem.cache_hint only when present on
-; both instructions being merged.
-
-; Both loads have !mem.cache_hint → preserved after GVN deduplication.
-; CHECK-LABEL: @both_hint
-; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
-define i64 @both_hint(ptr %p) {
-  %a = load i64, ptr %p, !mem.cache_hint !0
-  %b = load i64, ptr %p, !mem.cache_hint !0
-  %c = add i64 %a, %b
-  ret i64 %c
-}
-
-; Only one load has !mem.cache_hint → dropped after GVN deduplication.
-; CHECK-LABEL: @one_hint
-; CHECK: load
-; CHECK-NOT: !mem.cache_hint
-define i64 @one_hint(ptr %p) {
-  %a = load i64, ptr %p
-  %b = load i64, ptr %p, !mem.cache_hint !0
-  %c = add i64 %a, %b
-  ret i64 %c
-}
-
-; Both loads have !mem.cache_hint but with different payloads
-; The merged result is currently undefined. 
-; TODO: delegate to TTI to let targets decide how to merge differing payloads.
-; CHECK-LABEL: @diff_hint
-; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
-define i64 @diff_hint(ptr %p) {
-  %a = load i64, ptr %p, !mem.cache_hint !0
-  %b = load i64, ptr %p, !mem.cache_hint !2
-  %c = add i64 %a, %b
-  ret i64 %c
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
-!2 = !{ i32 0, !3 }
-!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/GVN/metadata.ll b/llvm/test/Transforms/GVN/metadata.ll
index ff055d889eac2..d1435b8bb5a04 100644
--- a/llvm/test/Transforms/GVN/metadata.ll
+++ b/llvm/test/Transforms/GVN/metadata.ll
@@ -485,6 +485,49 @@ join:
   ret void
 }
 
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+define i64 @test_mem_cache_hint_both(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_both
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i64, ptr [[P]], align 4, !mem.cache_hint [[META11:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %a = load i64, ptr %p, !mem.cache_hint !12
+  %b = load i64, ptr %p, !mem.cache_hint !12
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+define i64 @test_mem_cache_hint_one(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_one
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i64, ptr [[P]], align 4
+; CHECK-NEXT:    [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %a = load i64, ptr %p
+  %b = load i64, ptr %p, !mem.cache_hint !12
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads.
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+define i64 @test_mem_cache_hint_diff(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_diff
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i64, ptr [[P]], align 4, !mem.cache_hint [[META13:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %a = load i64, ptr %p, !mem.cache_hint !12
+  %b = load i64, ptr %p, !mem.cache_hint !14
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
 !0 = !{i32 0, i32 2}
 !1 = !{i32 3, i32 5}
 !2 = !{i32 2, i32 5}
@@ -497,6 +540,10 @@ join:
 !9 = !{i32 1, i32 5}
 !10 = !{i32 5, i32 1}
 !11 = !{}
+!12 = !{ i32 0, !13 }
+!13 = !{ !"nvvm.l1_eviction", !"first" }
+!14 = !{ i32 0, !15 }
+!15 = !{ !"nvvm.l1_eviction", !"last" }
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) }
 ;.
@@ -511,4 +558,8 @@ join:
 ; CHECK: [[RNG8]] = !{i64 0, i64 10}
 ; CHECK: [[RNG9]] = !{i64 0, i64 10, i64 20, i64 30}
 ; CHECK: [[RNG10]] = !{i64 10, i64 30}
+; CHECK: [[META11]] = !{i32 0, [[META12:![0-9]+]]}
+; CHECK: [[META12]] = !{!"nvvm.l1_eviction", !"first"}
+; CHECK: [[META13]] = !{i32 0, [[META14:![0-9]+]]}
+; CHECK: [[META14]] = !{!"nvvm.l1_eviction", !"last"}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index ec485edbb709b..ab270b2ee366f 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -396,6 +396,19 @@ entry:
   ret <1 x float> %c
 }
 
+define i32 @test_load_cast_combine_mem_cache_hint(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_load_cast_combine_mem_cache_hint(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !mem.cache_hint [[META17:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[L1]]
+;
+entry:
+  %l = load float, ptr %ptr, !mem.cache_hint !17
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
 !0 = !{!1, !1, i64 0}
 !1 = !{!"scalar type", !2}
 !2 = !{!"root"}
@@ -413,6 +426,8 @@ entry:
 !14 = !{!15}
 !15 = distinct !{!15, !16}
 !16 = distinct !{!16}
+!17 = !{ i32 0, !18 }
+!18 = !{ !"nvvm.l1_eviction", !"first" }
 
 ;.
 ; CHECK: [[SCALAR_TYPE_TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0}
@@ -432,4 +447,6 @@ entry:
 ; CHECK: [[META14]] = distinct !{[[META14]]}
 ; CHECK: [[ACC_GRP15]] = distinct !{}
 ; CHECK: [[META16]] = !{i32 3}
+; CHECK: [[META17]] = !{i32 0, [[META18:![0-9]+]]}
+; CHECK: [[META18]] = !{!"nvvm.l1_eviction", !"first"}
 ;.
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
deleted file mode 100644
index 771a74de9b15a..0000000000000
--- a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -passes=instcombine -S < %s | FileCheck %s
-
-; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
-; transformed (e.g., load+bitcast folded into a single load).
-
-; CHECK-LABEL: @cast_load_preserve_hint
-; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
-define i32 @cast_load_preserve_hint(ptr %p) {
-  %l = load float, ptr %p, !mem.cache_hint !0
-  %c = bitcast float %l to i32
-  ret i32 %c
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
index 26e87393e2e7b..9649d34b0fa54 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
@@ -635,6 +635,24 @@ out:
 }
 
 
+define void @hoist_mem_cache_hint(i1 %c, ptr %p) {
+; CHECK-LABEL: @hoist_mem_cache_hint(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[P:%.*]], align 4, !mem.cache_hint [[META13:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+then:
+  %t = load i32, ptr %p, !mem.cache_hint !10
+  br label %out
+else:
+  %e = load i32, ptr %p, !mem.cache_hint !10
+  br label %out
+out:
+  ret void
+}
+
 !0 = !{ i8 0, i8 1 }
 !1 = !{ i8 3, i8 5 }
 !2 = !{}
@@ -645,6 +663,8 @@ out:
 !7 = !{i32 4, i32 8, i32 20, i32 31}
 !8 = !{i32 2, i32 5}
 !9 = !{i32 2, i32 5, i32 22, i32 42, i32 45, i32 50}
+!10 = !{ i32 0, !11 }
+!11 = !{ !"nvvm.l1_eviction", !"first" }
 
 ;.
 ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5}
@@ -660,4 +680,6 @@ out:
 ; CHECK: [[META10]] = !{!"address", !"read_provenance"}
 ; CHECK: [[META11]] = !{!"provenance"}
 ; CHECK: [[META12]] = !{i32 3}
+; CHECK: [[META13]] = !{i32 0, [[META14:![0-9]+]]}
+; CHECK: [[META14]] = !{!"nvvm.l1_eviction", !"first"}
 ;.
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
deleted file mode 100644
index 81bd0f8c12f2a..0000000000000
--- a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
-
-; Test that SimplifyCFG speculates the conditional load and that
-; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
-; speculated load. mem.cache_hint is a performance hint and does not
-; imply UB, so it is safe to preserve.
-
-; CHECK-LABEL: @speculate_keeps_hint
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
-; CHECK-NEXT:    ret i32 [[SEL]]
-define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
-entry:
-  br i1 %c, label %if, label %join
-if:
-  %v = load i32, ptr %p, !mem.cache_hint !0
-  br label %join
-join:
-  %phi = phi i32 [ %v, %if ], [ 0, %entry ]
-  ret i32 %phi
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
index 5b6af20682b20..736aef59bcb3a 100644
--- a/llvm/test/Verifier/mem-cache-hint.ll
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -1,6 +1,6 @@
 ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
 
-declare void @foo(i32, i32)
+declare void @foo(ptr)
 declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
 
 ; CHECK: !mem.cache_hint is only valid on memory operations
@@ -21,9 +21,15 @@ define void @operand_no_not_integer(ptr %p) {
   ret void
 }
 
+; CHECK: !mem.cache_hint is not supported on non-intrinsic calls
+define void @non_intrinsic_call(ptr %p) {
+  call void @foo(ptr %p), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+  ret void
+}
+
 ; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
-define void @operand_no_not_pointer(i32 %x, i32 %y) {
-  call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+define void @operand_no_not_pointer(ptr %d, ptr %s) {
+  call void @llvm.memcpy.p0.p0.i64(ptr %d, ptr %s, i64 8, i1 false), !mem.cache_hint !{i32 2, !{!"nvvm.l1_eviction", !"first"}}
   ret void
 }
 



More information about the llvm-commits mailing list