[llvm] [IR] Composable and Extensible Memory Cache Control Hints (PR #181612)
Fei Peng via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 23 20:18:53 PDT 2026
https://github.com/fiigii updated https://github.com/llvm/llvm-project/pull/181612
>From d13d82f01e778f1938595aa3571e56de2b459b20 Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 16 Feb 2026 00:48:41 -0800
Subject: [PATCH 1/2] [IR] Composable and Extensible Memory Cache Control Hints
Add target-agnostic infrastructure for the !mem.cache_hint metadata kind, https://discourse.llvm.org/t/rfc-composable-and-extensible-memory-cache-control-hints-in-llvm-ir/89443
This patch includes:
- Registration of mem.cache_hint in FixedMetadataKinds (MD ID 48)
- IR Verifier validation of structural constraints
- Metadata helper support in combineMetadata(), copyMetadataForLoad(),
and dropUBImplyingAttrsAndMetadata()
- LangRef documentation for the metadata format and semantics
- Verifier and transform pass test coverage (GVN, InstCombine,
SimplifyCFG)
Assisted-by: Claude Code
---
llvm/docs/LangRef.rst | 66 +++++++++++++++++
llvm/include/llvm/IR/FixedMetadataKinds.def | 1 +
llvm/lib/IR/Instruction.cpp | 8 ++-
llvm/lib/IR/Verifier.cpp | 70 +++++++++++++++++++
llvm/lib/Transforms/Utils/Local.cpp | 6 ++
llvm/test/Transforms/GVN/mem-cache-hint.ll | 42 +++++++++++
.../Transforms/InstCombine/mem-cache-hint.ll | 15 ++++
.../Transforms/SimplifyCFG/mem-cache-hint.ll | 25 +++++++
llvm/test/Verifier/mem-cache-hint.ll | 68 ++++++++++++++++++
9 files changed, 298 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
create mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
create mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
create mode 100644 llvm/test/Verifier/mem-cache-hint.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 13883883d3981..e332836bd7b3c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7768,6 +7768,72 @@ The ``!captures`` attribute makes no statement about other uses of ``%x``, or
uses of the stored-to memory location after it has been overwritten with a
different value.
+'``mem.cache_hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``!mem.cache_hint`` metadata may be attached to any instruction that reads
+or writes memory. It provides target-specific cache control hints for the
+memory operation. This metadata is a performance hint: dropping or ignoring it
+must not change the observable behavior of the program.
+
+The value of ``!mem.cache_hint`` is a single metadata node containing a flat
+list of ``(operand_no, hint_node)`` pairs. Each ``operand_no`` is an ``i32``
+constant identifying a memory-object operand (not a raw IR operand). Each
+``hint_node`` is a metadata node containing target-prefixed key/value string
+pairs.
+
+The ``!mem.cache_hint`` node must contain an even number of entries, alternating
+``i32`` operand numbers and metadata nodes. Operand numbers must be unique within
+a ``!mem.cache_hint`` node and must be valid for the instruction. Keys within a
+single hint node must also be unique.
+
+Most instructions have a single memory-object operand (``operand_no = 0``).
+Copy-like instructions such as ``llvm.memcpy`` conceptually access multiple
+memory objects: e.g., destination is ``operand_no = 0`` and source is
+``operand_no = 1``.
+
+The hint node keys are prefixed with a target identifier (e.g., ``nvvm.``) and
+their interpretation is entirely target-dependent. The IR verifier enforces only
+the structural rules above; validation of target-specific keys and values is
+performed by the corresponding backend. Unsupported properties may be silently
+ignored during code generation.
+
+The following examples use ``nvvm.`` prefixed keys for NVIDIA GPU targets.
+Other targets may define their own prefixed keys.
+
+Example: load with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ %v = load i32, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1 }
+ !1 = !{ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l2_eviction", !"first",
+ !"nvvm.l2_prefetch_size", !"128B" }
+
+Example: store with cache hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ store i32 %v, ptr addrspace(1) %p, align 4, !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1 }
+ !1 = !{ !"nvvm.l1_eviction", !"last",
+ !"nvvm.l2_eviction", !"last" }
+
+Example: memcpy with per-operand hints (NVIDIA GPU):
+
+.. code-block:: llvm
+
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %d, ptr addrspace(1) %s,
+ i64 16, i1 false), !mem.cache_hint !0
+
+ !0 = !{ i32 0, !1, i32 1, !2 }
+ !1 = !{ !"nvvm.l1_eviction", !"last" }
+ !2 = !{ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l2_prefetch_size", !"128B" }
+
.. _llvm.loop:
'``llvm.loop``'
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 0d79677d7079e..b8cacb21f82ef 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -60,3 +60,4 @@ LLVM_FIXED_MD_KIND(MD_alloc_token, "alloc_token", 45)
LLVM_FIXED_MD_KIND(MD_implicit_ref, "implicit.ref", 46)
LLVM_FIXED_MD_KIND(MD_nofpclass, "nofpclass", 47)
LLVM_FIXED_MD_KIND(MD_call_target, "call_target", 48)
+LLVM_FIXED_MD_KIND(MD_mem_cache_hint, "mem.cache_hint", 49)
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 5205d36a228c1..f7450734d65ab 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -573,12 +573,14 @@ void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef<unsigned> Keep) {
// !annotation and !prof metadata does not impact semantics.
// !range, !nonnull and !align produce poison, so they are safe to speculate.
// !fpmath specifies floating-point precision and does not imply UB.
+ // !mem.cache_hint is a performance hint and does not imply UB.
// !noundef and various AA metadata must be dropped, as it generally produces
// immediate undefined behavior.
static const unsigned KnownIDs[] = {
- LLVMContext::MD_annotation, LLVMContext::MD_range,
- LLVMContext::MD_nonnull, LLVMContext::MD_align,
- LLVMContext::MD_fpmath, LLVMContext::MD_prof};
+ LLVMContext::MD_annotation, LLVMContext::MD_range,
+ LLVMContext::MD_nonnull, LLVMContext::MD_align,
+ LLVMContext::MD_fpmath, LLVMContext::MD_prof,
+ LLVMContext::MD_mem_cache_hint};
SmallVector<unsigned> KeepIDs;
KeepIDs.reserve(Keep.size() + std::size(KnownIDs));
append_range(KeepIDs, (!ProfcheckDisableMetadataFixes ? KnownIDs
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f4d4f81c12124..de636947bce23 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -549,6 +549,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitAccessGroupMetadata(const MDNode *MD);
void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
void visitAllocTokenMetadata(Instruction &I, MDNode *MD);
+ void visitMemCacheHintMetadata(Instruction &I, MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -5599,6 +5600,72 @@ void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
"expected integer constant", MD);
}
+void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
+ Check(I.mayReadOrWriteMemory(),
+ "!mem.cache_hint is only valid on memory operations", &I);
+
+ Check(MD->getNumOperands() % 2 == 0,
+ "!mem.cache_hint must have even number of operands "
+ "(operand_no, hint_node pairs)",
+ MD);
+
+ auto IsMemoryObjectOperand = [](const Value *V) {
+ return V->getType()->isPtrOrPtrVectorTy();
+ };
+
+ unsigned NumMemoryObjectOperands = 0;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
+ return IsMemoryObjectOperand(Arg.get());
+ });
+ else
+ NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
+ return IsMemoryObjectOperand(Op.get());
+ });
+
+ SmallVector<unsigned, 4> SeenOperandNos;
+
+ // Top-level metadata alternates: i32 operand_no, MDNode hint_node.
+ for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
+ auto *OpNoCI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+ Check(OpNoCI,
+ "!mem.cache_hint operand_no must be an integer constant in pair", MD);
+
+ Check(OpNoCI->getValue().isNonNegative(),
+ "!mem.cache_hint operand_no must be non-negative", MD);
+
+ uint64_t OperandNo = OpNoCI->getZExtValue();
+ Check(OperandNo < NumMemoryObjectOperands,
+ "!mem.cache_hint operand_no must refer to a valid memory object "
+ "operand",
+ &I);
+
+ Check(!is_contained(SeenOperandNos, OperandNo),
+ "!mem.cache_hint contains duplicate operand_no", MD);
+ SeenOperandNos.push_back(OperandNo);
+
+ const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
+ Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
+
+ Check(Node->getNumOperands() % 2 == 0,
+ "!mem.cache_hint hint node must have even number of operands "
+ "(key-value pairs)",
+ Node);
+
+ SmallVector<StringRef, 8> SeenKeys;
+ for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
+ const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
+ Check(Key, "!mem.cache_hint key must be a string", Node);
+
+ StringRef KeyStr = Key->getString();
+ Check(!is_contained(SeenKeys, KeyStr),
+ "!mem.cache_hint hint node contains duplicate key", Node);
+ SeenKeys.push_back(KeyStr);
+ // Values are target-specific and not validated here.
+ }
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -5831,6 +5898,9 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alloc_token))
visitAllocTokenMetadata(I, MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_mem_cache_hint))
+ visitMemCacheHintMetadata(I, MD);
+
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N, AreDebugLocsAllowed::Yes);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 84c0989a7fe07..5a7097e951c61 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3032,6 +3032,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
if (!AAOnly)
K->setMetadata(Kind, JMD);
break;
+ case LLVMContext::MD_mem_cache_hint:
+ // Preserve !mem.cache_hint if it is present on both instructions.
+ if (!AAOnly)
+ K->setMetadata(Kind, JMD);
+ break;
case LLVMContext::MD_noalias_addrspace:
if (DoesKMove)
K->setMetadata(Kind,
@@ -3142,6 +3147,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
case LLVMContext::MD_alias_scope:
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_cache_hint:
case LLVMContext::MD_mem_parallel_loop_access:
case LLVMContext::MD_access_group:
case LLVMContext::MD_noundef:
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
new file mode 100644
index 0000000000000..2bac96e2a8e8c
--- /dev/null
+++ b/llvm/test/Transforms/GVN/mem-cache-hint.ll
@@ -0,0 +1,42 @@
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+; Test that combineMetadata() preserves !mem.cache_hint only when present on
+; both instructions being merged.
+
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+; CHECK-LABEL: @both_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @both_hint(ptr %p) {
+ %a = load i64, ptr %p, !mem.cache_hint !0
+ %b = load i64, ptr %p, !mem.cache_hint !0
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+; CHECK-LABEL: @one_hint
+; CHECK: load
+; CHECK-NOT: !mem.cache_hint
+define i64 @one_hint(ptr %p) {
+ %a = load i64, ptr %p
+ %b = load i64, ptr %p, !mem.cache_hint !0
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads
+; The merged result is currently undefined.
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+; CHECK-LABEL: @diff_hint
+; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
+define i64 @diff_hint(ptr %p) {
+ %a = load i64, ptr %p, !mem.cache_hint !0
+ %b = load i64, ptr %p, !mem.cache_hint !2
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
+!2 = !{ i32 0, !3 }
+!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
new file mode 100644
index 0000000000000..771a74de9b15a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
+; transformed (e.g., load+bitcast folded into a single load).
+
+; CHECK-LABEL: @cast_load_preserve_hint
+; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
+define i32 @cast_load_preserve_hint(ptr %p) {
+ %l = load float, ptr %p, !mem.cache_hint !0
+ %c = bitcast float %l to i32
+ ret i32 %c
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
new file mode 100644
index 0000000000000..81bd0f8c12f2a
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
@@ -0,0 +1,25 @@
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; Test that SimplifyCFG speculates the conditional load and that
+; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
+; speculated load. mem.cache_hint is a performance hint and does not
+; imply UB, so it is safe to preserve.
+
+; CHECK-LABEL: @speculate_keeps_hint
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
+; CHECK-NEXT: ret i32 [[SEL]]
+define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
+entry:
+ br i1 %c, label %if, label %join
+if:
+ %v = load i32, ptr %p, !mem.cache_hint !0
+ br label %join
+join:
+ %phi = phi i32 [ %v, %if ], [ 0, %entry ]
+ ret i32 %phi
+}
+
+!0 = !{ i32 0, !1 }
+!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
new file mode 100644
index 0000000000000..5b6af20682b20
--- /dev/null
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -0,0 +1,68 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+declare void @foo(i32, i32)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
+
+; CHECK: !mem.cache_hint is only valid on memory operations
+define void @non_memory_op(i32 %x, i32 %y) {
+ %z = add i32 %x, %y, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint must have even number of operands
+define void @odd_top_level_operands(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must be an integer constant in pair
+define void @operand_no_not_integer(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{!"zero", !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_not_pointer(i32 %x, i32 %y) {
+ call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
+define void @operand_no_out_of_range(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 1, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint contains duplicate operand_no
+define void @duplicate_operand_no(ptr %p) {
+ call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %p, i64 8, i1 false), !mem.cache_hint !{
+ i32 0, !{!"nvvm.l1_eviction", !"first"},
+ i32 0, !{!"nvvm.l1_eviction", !"last"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node must be a metadata node
+define void @hint_node_not_mdnode(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !"not_a_node"}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node must have even number of operands
+define void @hint_node_odd_operands(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint key must be a string
+define void @key_not_string(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{i32 0, !"first"}}
+ ret void
+}
+
+; CHECK: !mem.cache_hint hint node contains duplicate key
+define void @duplicate_key(ptr %p) {
+ %v = load i32, ptr %p, !mem.cache_hint !{i32 0, !{
+ !"nvvm.l1_eviction", !"first",
+ !"nvvm.l1_eviction", !"last"}}
+ ret void
+}
>From 02e57f05ce6460e14ccbd87ba067898bbb4f784d Mon Sep 17 00:00:00 2001
From: Fei Peng <feip at nvidia.com>
Date: Mon, 23 Mar 2026 16:06:49 -0700
Subject: [PATCH 2/2] [IR] Address review comments for !mem.cache_hint metadata
- Forbid !mem.cache_hint on non-intrinsic calls to avoid IR validity
depending on function attributes
- Simplify count_if lambda to use Value* directly for CallBase args
- Use SmallDenseSet instead of SmallVector for duplicate detection
- Merge standalone test files into existing common test files
(loadstore-metadata.ll, hoist-with-metadata.ll) and regenerate
CHECK lines with update_test_checks.py
- Add verifier test for non-intrinsic call restriction
---
llvm/lib/IR/Verifier.cpp | 22 ++++----
llvm/test/Transforms/GVN/mem-cache-hint.ll | 42 ---------------
llvm/test/Transforms/GVN/metadata.ll | 51 +++++++++++++++++++
.../InstCombine/loadstore-metadata.ll | 17 +++++++
.../Transforms/InstCombine/mem-cache-hint.ll | 15 ------
.../SimplifyCFG/hoist-with-metadata.ll | 22 ++++++++
.../Transforms/SimplifyCFG/mem-cache-hint.ll | 25 ---------
llvm/test/Verifier/mem-cache-hint.ll | 12 +++--
8 files changed, 110 insertions(+), 96 deletions(-)
delete mode 100644 llvm/test/Transforms/GVN/mem-cache-hint.ll
delete mode 100644 llvm/test/Transforms/InstCombine/mem-cache-hint.ll
delete mode 100644 llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index de636947bce23..81e45b9886410 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5614,16 +5614,18 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
};
unsigned NumMemoryObjectOperands = 0;
- if (const auto *CB = dyn_cast<CallBase>(&I))
- NumMemoryObjectOperands = count_if(CB->args(), [&](const Use &Arg) {
- return IsMemoryObjectOperand(Arg.get());
- });
- else
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ Check(CB->getIntrinsicID() != Intrinsic::not_intrinsic,
+ "!mem.cache_hint is not supported on non-intrinsic calls", &I);
+ NumMemoryObjectOperands = count_if(
+ CB->args(), [&](Value *Arg) { return IsMemoryObjectOperand(Arg); });
+ } else {
NumMemoryObjectOperands = count_if(I.operands(), [&](const Use &Op) {
return IsMemoryObjectOperand(Op.get());
});
+ }
- SmallVector<unsigned, 4> SeenOperandNos;
+ SmallDenseSet<unsigned, 4> SeenOperandNos;
// Top-level metadata alternates: i32 operand_no, MDNode hint_node.
for (unsigned i = 0; i + 1 < MD->getNumOperands(); i += 2) {
@@ -5640,9 +5642,8 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
"operand",
&I);
- Check(!is_contained(SeenOperandNos, OperandNo),
+ Check(SeenOperandNos.insert(OperandNo).second,
"!mem.cache_hint contains duplicate operand_no", MD);
- SeenOperandNos.push_back(OperandNo);
const auto *Node = dyn_cast<MDNode>(MD->getOperand(i + 1));
Check(Node, "!mem.cache_hint hint node must be a metadata node", MD);
@@ -5652,15 +5653,14 @@ void Verifier::visitMemCacheHintMetadata(Instruction &I, MDNode *MD) {
"(key-value pairs)",
Node);
- SmallVector<StringRef, 8> SeenKeys;
+ SmallDenseSet<StringRef, 8> SeenKeys;
for (unsigned j = 0; j + 1 < Node->getNumOperands(); j += 2) {
const auto *Key = dyn_cast<MDString>(Node->getOperand(j));
Check(Key, "!mem.cache_hint key must be a string", Node);
StringRef KeyStr = Key->getString();
- Check(!is_contained(SeenKeys, KeyStr),
+ Check(SeenKeys.insert(KeyStr).second,
"!mem.cache_hint hint node contains duplicate key", Node);
- SeenKeys.push_back(KeyStr);
// Values are target-specific and not validated here.
}
}
diff --git a/llvm/test/Transforms/GVN/mem-cache-hint.ll b/llvm/test/Transforms/GVN/mem-cache-hint.ll
deleted file mode 100644
index 2bac96e2a8e8c..0000000000000
--- a/llvm/test/Transforms/GVN/mem-cache-hint.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt -passes=gvn -S < %s | FileCheck %s
-
-; Test that combineMetadata() preserves !mem.cache_hint only when present on
-; both instructions being merged.
-
-; Both loads have !mem.cache_hint → preserved after GVN deduplication.
-; CHECK-LABEL: @both_hint
-; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
-define i64 @both_hint(ptr %p) {
- %a = load i64, ptr %p, !mem.cache_hint !0
- %b = load i64, ptr %p, !mem.cache_hint !0
- %c = add i64 %a, %b
- ret i64 %c
-}
-
-; Only one load has !mem.cache_hint → dropped after GVN deduplication.
-; CHECK-LABEL: @one_hint
-; CHECK: load
-; CHECK-NOT: !mem.cache_hint
-define i64 @one_hint(ptr %p) {
- %a = load i64, ptr %p
- %b = load i64, ptr %p, !mem.cache_hint !0
- %c = add i64 %a, %b
- ret i64 %c
-}
-
-; Both loads have !mem.cache_hint but with different payloads
-; The merged result is currently undefined.
-; TODO: delegate to TTI to let targets decide how to merge differing payloads.
-; CHECK-LABEL: @diff_hint
-; CHECK: load i64, ptr %p{{.*}} !mem.cache_hint
-define i64 @diff_hint(ptr %p) {
- %a = load i64, ptr %p, !mem.cache_hint !0
- %b = load i64, ptr %p, !mem.cache_hint !2
- %c = add i64 %a, %b
- ret i64 %c
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
-!2 = !{ i32 0, !3 }
-!3 = !{ !"nvvm.l1_eviction", !"last" }
diff --git a/llvm/test/Transforms/GVN/metadata.ll b/llvm/test/Transforms/GVN/metadata.ll
index ff055d889eac2..d1435b8bb5a04 100644
--- a/llvm/test/Transforms/GVN/metadata.ll
+++ b/llvm/test/Transforms/GVN/metadata.ll
@@ -485,6 +485,49 @@ join:
ret void
}
+; Both loads have !mem.cache_hint → preserved after GVN deduplication.
+define i64 @test_mem_cache_hint_both(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_both
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = load i64, ptr [[P]], align 4, !mem.cache_hint [[META11:![0-9]+]]
+; CHECK-NEXT: [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT: ret i64 [[C]]
+;
+ %a = load i64, ptr %p, !mem.cache_hint !12
+ %b = load i64, ptr %p, !mem.cache_hint !12
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Only one load has !mem.cache_hint → dropped after GVN deduplication.
+define i64 @test_mem_cache_hint_one(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_one
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = load i64, ptr [[P]], align 4
+; CHECK-NEXT: [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT: ret i64 [[C]]
+;
+ %a = load i64, ptr %p
+ %b = load i64, ptr %p, !mem.cache_hint !12
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+; Both loads have !mem.cache_hint but with different payloads.
+; TODO: delegate to TTI to let targets decide how to merge differing payloads.
+define i64 @test_mem_cache_hint_diff(ptr %p) {
+; CHECK-LABEL: define i64 @test_mem_cache_hint_diff
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = load i64, ptr [[P]], align 4, !mem.cache_hint [[META13:![0-9]+]]
+; CHECK-NEXT: [[C:%.*]] = add i64 [[A]], [[A]]
+; CHECK-NEXT: ret i64 [[C]]
+;
+ %a = load i64, ptr %p, !mem.cache_hint !12
+ %b = load i64, ptr %p, !mem.cache_hint !14
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
!0 = !{i32 0, i32 2}
!1 = !{i32 3, i32 5}
!2 = !{i32 2, i32 5}
@@ -497,6 +540,10 @@ join:
!9 = !{i32 1, i32 5}
!10 = !{i32 5, i32 1}
!11 = !{}
+!12 = !{ i32 0, !13 }
+!13 = !{ !"nvvm.l1_eviction", !"first" }
+!14 = !{ i32 0, !15 }
+!15 = !{ !"nvvm.l1_eviction", !"last" }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) }
;.
@@ -511,4 +558,8 @@ join:
; CHECK: [[RNG8]] = !{i64 0, i64 10}
; CHECK: [[RNG9]] = !{i64 0, i64 10, i64 20, i64 30}
; CHECK: [[RNG10]] = !{i64 10, i64 30}
+; CHECK: [[META11]] = !{i32 0, [[META12:![0-9]+]]}
+; CHECK: [[META12]] = !{!"nvvm.l1_eviction", !"first"}
+; CHECK: [[META13]] = !{i32 0, [[META14:![0-9]+]]}
+; CHECK: [[META14]] = !{!"nvvm.l1_eviction", !"last"}
;.
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index ec485edbb709b..ab270b2ee366f 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -396,6 +396,19 @@ entry:
ret <1 x float> %c
}
+define i32 @test_load_cast_combine_mem_cache_hint(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_load_cast_combine_mem_cache_hint(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR]], align 4, !mem.cache_hint [[META17:![0-9]+]]
+; CHECK-NEXT: ret i32 [[L1]]
+;
+entry:
+ %l = load float, ptr %ptr, !mem.cache_hint !17
+ %c = bitcast float %l to i32
+ ret i32 %c
+}
+
!0 = !{!1, !1, i64 0}
!1 = !{!"scalar type", !2}
!2 = !{!"root"}
@@ -413,6 +426,8 @@ entry:
!14 = !{!15}
!15 = distinct !{!15, !16}
!16 = distinct !{!16}
+!17 = !{ i32 0, !18 }
+!18 = !{ !"nvvm.l1_eviction", !"first" }
;.
; CHECK: [[SCALAR_TYPE_TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0}
@@ -432,4 +447,6 @@ entry:
; CHECK: [[META14]] = distinct !{[[META14]]}
; CHECK: [[ACC_GRP15]] = distinct !{}
; CHECK: [[META16]] = !{i32 3}
+; CHECK: [[META17]] = !{i32 0, [[META18:![0-9]+]]}
+; CHECK: [[META18]] = !{!"nvvm.l1_eviction", !"first"}
;.
diff --git a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll b/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
deleted file mode 100644
index 771a74de9b15a..0000000000000
--- a/llvm/test/Transforms/InstCombine/mem-cache-hint.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -passes=instcombine -S < %s | FileCheck %s
-
-; Test that copyMetadataForLoad() preserves !mem.cache_hint when a load is
-; transformed (e.g., load+bitcast folded into a single load).
-
-; CHECK-LABEL: @cast_load_preserve_hint
-; CHECK: load i32, ptr %p{{.*}} !mem.cache_hint
-define i32 @cast_load_preserve_hint(ptr %p) {
- %l = load float, ptr %p, !mem.cache_hint !0
- %c = bitcast float %l to i32
- ret i32 %c
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
index 26e87393e2e7b..9649d34b0fa54 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
@@ -635,6 +635,24 @@ out:
}
+define void @hoist_mem_cache_hint(i1 %c, ptr %p) {
+; CHECK-LABEL: @hoist_mem_cache_hint(
+; CHECK-NEXT: if:
+; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[P:%.*]], align 4, !mem.cache_hint [[META13:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+if:
+ br i1 %c, label %then, label %else
+then:
+ %t = load i32, ptr %p, !mem.cache_hint !10
+ br label %out
+else:
+ %e = load i32, ptr %p, !mem.cache_hint !10
+ br label %out
+out:
+ ret void
+}
+
!0 = !{ i8 0, i8 1 }
!1 = !{ i8 3, i8 5 }
!2 = !{}
@@ -645,6 +663,8 @@ out:
!7 = !{i32 4, i32 8, i32 20, i32 31}
!8 = !{i32 2, i32 5}
!9 = !{i32 2, i32 5, i32 22, i32 42, i32 45, i32 50}
+!10 = !{ i32 0, !11 }
+!11 = !{ !"nvvm.l1_eviction", !"first" }
;.
; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5}
@@ -660,4 +680,6 @@ out:
; CHECK: [[META10]] = !{!"address", !"read_provenance"}
; CHECK: [[META11]] = !{!"provenance"}
; CHECK: [[META12]] = !{i32 3}
+; CHECK: [[META13]] = !{i32 0, [[META14:![0-9]+]]}
+; CHECK: [[META14]] = !{!"nvvm.l1_eviction", !"first"}
;.
diff --git a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll b/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
deleted file mode 100644
index 81bd0f8c12f2a..0000000000000
--- a/llvm/test/Transforms/SimplifyCFG/mem-cache-hint.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
-
-; Test that SimplifyCFG speculates the conditional load and that
-; dropUBImplyingAttrsAndMetadata() keeps !mem.cache_hint on the
-; speculated load. mem.cache_hint is a performance hint and does not
-; imply UB, so it is safe to preserve.
-
-; CHECK-LABEL: @speculate_keeps_hint
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[V:%.*]] = load i32, ptr %p{{.*}} !mem.cache_hint
-; CHECK-NEXT: [[SEL:%.*]] = select i1 %c, i32 [[V]], i32 0
-; CHECK-NEXT: ret i32 [[SEL]]
-define i32 @speculate_keeps_hint(i1 %c, ptr dereferenceable(4) align 4 %p) {
-entry:
- br i1 %c, label %if, label %join
-if:
- %v = load i32, ptr %p, !mem.cache_hint !0
- br label %join
-join:
- %phi = phi i32 [ %v, %if ], [ 0, %entry ]
- ret i32 %phi
-}
-
-!0 = !{ i32 0, !1 }
-!1 = !{ !"nvvm.l1_eviction", !"first" }
diff --git a/llvm/test/Verifier/mem-cache-hint.ll b/llvm/test/Verifier/mem-cache-hint.ll
index 5b6af20682b20..736aef59bcb3a 100644
--- a/llvm/test/Verifier/mem-cache-hint.ll
+++ b/llvm/test/Verifier/mem-cache-hint.ll
@@ -1,6 +1,6 @@
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
-declare void @foo(i32, i32)
+declare void @foo(ptr)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly, ptr noalias readonly, i64, i1 immarg)
; CHECK: !mem.cache_hint is only valid on memory operations
@@ -21,9 +21,15 @@ define void @operand_no_not_integer(ptr %p) {
ret void
}
+; CHECK: !mem.cache_hint is not supported on non-intrinsic calls
+define void @non_intrinsic_call(ptr %p) {
+ call void @foo(ptr %p), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+ ret void
+}
+
; CHECK: !mem.cache_hint operand_no must refer to a valid memory object operand
-define void @operand_no_not_pointer(i32 %x, i32 %y) {
- call void @foo(i32 %x, i32 %y), !mem.cache_hint !{i32 0, !{!"nvvm.l1_eviction", !"first"}}
+define void @operand_no_not_pointer(ptr %d, ptr %s) {
+ call void @llvm.memcpy.p0.p0.i64(ptr %d, ptr %s, i64 8, i1 false), !mem.cache_hint !{i32 2, !{!"nvvm.l1_eviction", !"first"}}
ret void
}
More information about the llvm-commits
mailing list