[llvm] [IR] Introduce `llvm.errno.tbaa` metadata for errno alias disambiguation (PR #160458)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 01:00:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Antonio Frighetto (antoniofrighetto)
<details>
<summary>Changes</summary>
Add a new named module-level frontend-annotated metadata that specifies the TBAA node for an integer access, for which, C/C++ `errno` accesses are guaranteed to use (under strict aliasing). This should allow LLVM to prove the involved memory location/ accesses may not alias `errno`; thus, to perform optimizations around errno-writing libcalls (store-to-load forwarding amongst others).
Previous discussion: https://discourse.llvm.org/t/rfc-modelling-errno-memory-effects/82972.
---
Full diff: https://github.com/llvm/llvm-project/pull/160458.diff
9 Files Affected:
- (modified) llvm/docs/LangRef.rst (+22)
- (modified) llvm/include/llvm/IR/Verifier.h (+3-3)
- (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+1-1)
- (modified) llvm/lib/IR/Verifier.cpp (+42-27)
- (added) llvm/test/Linker/Inputs/errno-tbaa-cxx-metadata.ll (+5)
- (added) llvm/test/Linker/Inputs/errno-tbaa-metadata.ll (+5)
- (added) llvm/test/Linker/link-errno-tbaa-metadata.ll (+8)
- (added) llvm/test/Verifier/errno-tbaa-metadata-1.ll (+5)
- (added) llvm/test/Verifier/errno-tbaa-metadata-2.ll (+9)
``````````diff
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b32a27f9555fd..f64b63e188f62 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -8862,6 +8862,28 @@ For example, the following metadata section contains two library specifiers::
Each library specifier will be handled independently by the consuming linker.
The effect of the library specifiers are defined by the consuming linker.
+'``llvm.errno.tbaa``' Named Metadata
+====================================
+
+The module-level ``!llvm.errno.tbaa`` metadata specifies the TBAA nodes used
+for accessing ``errno``. These nodes are guaranteed to represent int-compatible
+accesses according to C/C++ strict aliasing rules. This should let LLVM alias
+analyses to reason about aliasing with ``errno`` when calling library functions
+that may set ``errno``, allowing optimizations such as store-to-load forwarding
+across such routines.
+
+For example, the following is a valid metadata specifying the TBAA information
+for an integer access:
+
+ !llvm.errno.tbaa = !{!0}
+ !0 = !{!1, !1, i64 0}
+ !1 = !{!"int", !2, i64 0}
+ !2 = !{!"omnipotent char", !3, i64 0}
+ !3 = !{!"Simple C/C++ TBAA"}
+
+Multiple TBAA operands are allowed to support merging of modules that may use
+different TBAA hierarchies (e.g., when mixing C and C++).
+
.. _summary:
ThinLTO Summary
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index 8dbb9c8a41d7e..342886da6f9ef 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -75,9 +75,9 @@ class TBAAVerifier {
public:
TBAAVerifier(VerifierSupport *Diagnostic = nullptr)
: Diagnostic(Diagnostic) {}
- /// Visit an instruction and return true if it is valid, return false if an
- /// invalid TBAA is attached.
- LLVM_ABI bool visitTBAAMetadata(Instruction &I, const MDNode *MD);
+ /// Visit an instruction, or a TBAA node itself as part of a metadata, and
+ /// return true if it is valid, return false if an invalid TBAA is attached.
+ LLVM_ABI bool visitTBAAMetadata(Instruction *I, const MDNode *MD);
};
/// Check a function for errors, useful for use when debugging a
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 22a0d0ffdbaab..832aa9ff7ed3d 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7024,7 +7024,7 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
if (!MDLoader->isStrippingTBAA()) {
for (auto &I : instructions(F)) {
MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa);
- if (!TBAA || TBAAVerifyHelper.visitTBAAMetadata(I, TBAA))
+ if (!TBAA || TBAAVerifyHelper.visitTBAAMetadata(&I, TBAA))
continue;
MDLoader->setStripTBAA(true);
stripTBAA(F->getParent());
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 9bde965d660a4..9d2e086f9399f 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -480,6 +480,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
visitModuleFlags();
visitModuleIdents();
visitModuleCommandLines();
+ visitModuleErrnoTBAA();
verifyCompileUnits();
@@ -516,6 +517,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitComdat(const Comdat &C);
void visitModuleIdents();
void visitModuleCommandLines();
+ void visitModuleErrnoTBAA();
void visitModuleFlags();
void visitModuleFlag(const MDNode *Op,
DenseMap<const MDString *, const MDNode *> &SeenIDs,
@@ -1815,6 +1817,18 @@ void Verifier::visitModuleCommandLines() {
}
}
+void Verifier::visitModuleErrnoTBAA() {
+ const NamedMDNode *ErrnoTBAA = M.getNamedMetadata("llvm.errno.tbaa");
+ if (!ErrnoTBAA)
+ return;
+
+ Check(ErrnoTBAA->getNumOperands() >= 1,
+ "llvm.errno.tbaa must have at least one operand", ErrnoTBAA);
+
+ for (const MDNode *N : ErrnoTBAA->operands())
+ TBAAVerifyHelper.visitTBAAMetadata(nullptr, N);
+}
+
void Verifier::visitModuleFlags() {
const NamedMDNode *Flags = M.getModuleFlagsMetadata();
if (!Flags) return;
@@ -5537,7 +5551,7 @@ void Verifier::visitInstruction(Instruction &I) {
visitNofreeMetadata(I, MD);
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
- TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+ TBAAVerifyHelper.visitTBAAMetadata(&I, TBAA);
if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
visitAliasScopeListMetadata(MD);
@@ -7874,21 +7888,22 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
return isa_and_nonnull<MDNode>(Type->getOperand(0));
}
-bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
- CheckTBAA(MD->getNumOperands() > 0, "TBAA metadata cannot have 0 operands",
- &I, MD);
+bool TBAAVerifier::visitTBAAMetadata(Instruction *I, const MDNode *MD) {
+ CheckTBAA(MD->getNumOperands() > 0, "TBAA metadata cannot have 0 operands", I,
+ MD);
- CheckTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
- isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
- isa<AtomicCmpXchgInst>(I),
- "This instruction shall not have a TBAA access tag!", &I);
+ if (I)
+ CheckTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+ isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
+ isa<AtomicCmpXchgInst>(I),
+ "This instruction shall not have a TBAA access tag!", I);
bool IsStructPathTBAA =
isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
CheckTBAA(IsStructPathTBAA,
"Old-style TBAA is no longer allowed, use struct-path TBAA instead",
- &I);
+ I);
MDNode *BaseNode = dyn_cast_or_null<MDNode>(MD->getOperand(0));
MDNode *AccessType = dyn_cast_or_null<MDNode>(MD->getOperand(1));
@@ -7897,17 +7912,17 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
if (IsNewFormat) {
CheckTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5,
- "Access tag metadata must have either 4 or 5 operands", &I, MD);
+ "Access tag metadata must have either 4 or 5 operands", I, MD);
} else {
CheckTBAA(MD->getNumOperands() < 5,
- "Struct tag metadata must have either 3 or 4 operands", &I, MD);
+ "Struct tag metadata must have either 3 or 4 operands", I, MD);
}
// Check the access size field.
if (IsNewFormat) {
auto *AccessSizeNode = mdconst::dyn_extract_or_null<ConstantInt>(
MD->getOperand(3));
- CheckTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD);
+ CheckTBAA(AccessSizeNode, "Access size field must be a constant", I, MD);
}
// Check the immutability flag.
@@ -7916,27 +7931,27 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
auto *IsImmutableCI = mdconst::dyn_extract_or_null<ConstantInt>(
MD->getOperand(ImmutabilityFlagOpNo));
CheckTBAA(IsImmutableCI,
- "Immutability tag on struct tag metadata must be a constant", &I,
+ "Immutability tag on struct tag metadata must be a constant", I,
MD);
CheckTBAA(
IsImmutableCI->isZero() || IsImmutableCI->isOne(),
- "Immutability part of the struct tag metadata must be either 0 or 1",
- &I, MD);
+ "Immutability part of the struct tag metadata must be either 0 or 1", I,
+ MD);
}
CheckTBAA(BaseNode && AccessType,
"Malformed struct tag metadata: base and access-type "
"should be non-null and point to Metadata nodes",
- &I, MD, BaseNode, AccessType);
+ I, MD, BaseNode, AccessType);
if (!IsNewFormat) {
CheckTBAA(isValidScalarTBAANode(AccessType),
- "Access type node must be a valid scalar type", &I, MD,
+ "Access type node must be a valid scalar type", I, MD,
AccessType);
}
auto *OffsetCI = mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(2));
- CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
+ CheckTBAA(OffsetCI, "Offset must be constant integer", I, MD);
APInt Offset = OffsetCI->getValue();
bool SeenAccessTypeInPath = false;
@@ -7944,17 +7959,17 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
SmallPtrSet<MDNode *, 4> StructPath;
for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
- BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
- IsNewFormat)) {
+ BaseNode =
+ getFieldNodeFromTBAABaseNode(*I, BaseNode, Offset, IsNewFormat)) {
if (!StructPath.insert(BaseNode).second) {
- CheckFailed("Cycle detected in struct path", &I, MD);
+ CheckFailed("Cycle detected in struct path", I, MD);
return false;
}
bool Invalid;
unsigned BaseNodeBitWidth;
- std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
- IsNewFormat);
+ std::tie(Invalid, BaseNodeBitWidth) =
+ verifyTBAABaseNode(*I, BaseNode, IsNewFormat);
// If the base node is invalid in itself, then we've already printed all the
// errors we wanted to print.
@@ -7964,20 +7979,20 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
SeenAccessTypeInPath |= BaseNode == AccessType;
if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
- CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
- &I, MD, &Offset);
+ CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", I,
+ MD, &Offset);
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
(BaseNodeBitWidth == 0 && Offset == 0) ||
(IsNewFormat && BaseNodeBitWidth == ~0u),
- "Access bit-width not the same as description bit-width", &I, MD,
+ "Access bit-width not the same as description bit-width", I, MD,
BaseNodeBitWidth, Offset.getBitWidth());
if (IsNewFormat && SeenAccessTypeInPath)
break;
}
- CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
+ CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", I,
MD);
return true;
}
diff --git a/llvm/test/Linker/Inputs/errno-tbaa-cxx-metadata.ll b/llvm/test/Linker/Inputs/errno-tbaa-cxx-metadata.ll
new file mode 100644
index 0000000000000..eefb6d833f636
--- /dev/null
+++ b/llvm/test/Linker/Inputs/errno-tbaa-cxx-metadata.ll
@@ -0,0 +1,5 @@
+!llvm.errno.tbaa = !{!0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/Linker/Inputs/errno-tbaa-metadata.ll b/llvm/test/Linker/Inputs/errno-tbaa-metadata.ll
new file mode 100644
index 0000000000000..5dd468776bdee
--- /dev/null
+++ b/llvm/test/Linker/Inputs/errno-tbaa-metadata.ll
@@ -0,0 +1,5 @@
+!llvm.errno.tbaa = !{!0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Linker/link-errno-tbaa-metadata.ll b/llvm/test/Linker/link-errno-tbaa-metadata.ll
new file mode 100644
index 0000000000000..b58373d3acbef
--- /dev/null
+++ b/llvm/test/Linker/link-errno-tbaa-metadata.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link %S/Inputs/errno-tbaa-metadata.ll %S/Inputs/errno-tbaa-cxx-metadata.ll -S -o - | FileCheck %s --check-prefix=CHECK-MERGE
+; RUN: llvm-link %S/Inputs/errno-tbaa-metadata.ll %S/Inputs/errno-tbaa-metadata.ll -S -o - | FileCheck %s --check-prefix=CHECK-DEDUP
+
+; Ensure merging when linking modules w/ different errno TBAA hierarchies.
+; CHECK-MERGE: !llvm.errno.tbaa = !{![[NODE0:[0-9]+]], ![[NODE1:[0-9]+]]}
+
+; Ensure deduplication when linking modules w/ identical errno TBAA nodes.
+; CHECK-DEDUP: !llvm.errno.tbaa = !{![[NODE:[0-9]+]]}
diff --git a/llvm/test/Verifier/errno-tbaa-metadata-1.ll b/llvm/test/Verifier/errno-tbaa-metadata-1.ll
new file mode 100644
index 0000000000000..0530653309966
--- /dev/null
+++ b/llvm/test/Verifier/errno-tbaa-metadata-1.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: llvm.errno.tbaa must have at least one operand
+!llvm.errno.tbaa = !{}
diff --git a/llvm/test/Verifier/errno-tbaa-metadata-2.ll b/llvm/test/Verifier/errno-tbaa-metadata-2.ll
new file mode 100644
index 0000000000000..6b2a4c6e8bda7
--- /dev/null
+++ b/llvm/test/Verifier/errno-tbaa-metadata-2.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: Malformed struct tag metadata: base and access-type should be non-null and point to Metadata nodes
+!llvm.errno.tbaa = !{!0}
+!0 = !{!1, i64 0, !1}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
``````````
</details>
https://github.com/llvm/llvm-project/pull/160458
More information about the llvm-commits
mailing list