[llvm] [IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path (PR #76356)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 25 02:29:15 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Bushev Dmitry (dybv-sc)
<details>
<summary>Changes</summary>
Support for multiple fields to have same offset in TBAA struct-path metadata nodes. Primary goal is to support union-like structures to participate in TBAA struct-path resolution.
---
Full diff: https://github.com/llvm/llvm-project/pull/76356.diff
5 Files Affected:
- (modified) llvm/include/llvm/IR/Verifier.h (+9-2)
- (modified) llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp (+34-10)
- (modified) llvm/lib/IR/Verifier.cpp (+94-49)
- (modified) llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll (+20)
- (modified) llvm/test/Verifier/tbaa.ll (+5-5)
``````````diff
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index b25f8eb77ee38b..95db2c4b16eca7 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -59,8 +59,15 @@ class TBAAVerifier {
/// \name Helper functions used by \c visitTBAAMetadata.
/// @{
- MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
- APInt &Offset, bool IsNewFormat);
+ std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
+ const MDNode *BaseNode,
+ APInt &Offset,
+ bool IsNewFormat);
+ bool findAccessTypeNode(Instruction &I,
+ SmallPtrSetImpl<const MDNode *> &StructPath,
+ APInt Offset, bool IsNewFormat,
+ const MDNode *AccessType, const MDNode *BaseNode,
+ const MDNode *MD);
TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
const MDNode *BaseNode,
bool IsNewFormat);
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e4dc1a867f6f0c..d7301a4e396345 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -121,6 +121,7 @@
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
+#include <stack>
using namespace llvm;
@@ -299,9 +300,10 @@ class TBAAStructTypeNode {
return TBAAStructTypeNode(TypeNode);
}
- /// Get this TBAAStructTypeNode's field in the type DAG with
+ /// Get this TBAAStructTypeNode's fields in the type DAG with
/// given offset. Update the offset to be relative to the field type.
- TBAAStructTypeNode getField(uint64_t &Offset) const {
+ /// There could be multiple fields with same offset.
+ std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
bool NewFormat = isNewFormat();
const ArrayRef<MDOperand> Operands = Node->operands();
const unsigned NumOperands = Operands.size();
@@ -309,11 +311,11 @@ class TBAAStructTypeNode {
if (NewFormat) {
// New-format root and scalar type nodes have no fields.
if (NumOperands < 6)
- return TBAAStructTypeNode();
+ return {TBAAStructTypeNode()};
} else {
// Parent can be omitted for the root node.
if (NumOperands < 2)
- return TBAAStructTypeNode();
+ return {TBAAStructTypeNode()};
// Fast path for a scalar type node and a struct type node with a single
// field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
if (!P)
- return TBAAStructTypeNode();
- return TBAAStructTypeNode(P);
+ return {TBAAStructTypeNode()};
+ return {TBAAStructTypeNode(P)};
}
}
@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
unsigned NumOpsPerField = NewFormat ? 3 : 2;
unsigned TheIdx = 0;
+ std::vector<TBAAStructTypeNode> Ret;
+
for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
Idx += NumOpsPerField) {
uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
uint64_t Cur =
mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
Offset -= Cur;
+
+ // Collect all fields that have right offset.
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
- if (!P)
- return TBAAStructTypeNode();
- return TBAAStructTypeNode(P);
+ Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
+
+ while (TheIdx > FirstFieldOpNo) {
+ TheIdx -= NumOpsPerField;
+ auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
+ if (Cur != Val->getZExtValue())
+ break;
+ MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
+ P ? Ret.emplace_back(P) : Ret.emplace_back();
+ }
+ return Ret;
}
};
@@ -599,11 +613,19 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
// from the base type, follow the edge with the correct offset in the type DAG
// and adjust the offset until we reach the field type or until we reach the
// access type.
+ // If multiple fields have same offset in some base type, then scan each such
+ // field.
bool NewFormat = BaseTag.isNewFormat();
TBAAStructTypeNode BaseType(BaseTag.getBaseType());
uint64_t OffsetInBase = BaseTag.getOffset();
+ SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
+ ToCheck.emplace_back(BaseType, OffsetInBase);
for (;;) {
+ assert(!ToCheck.empty() && "check list should not be empty");
+ std::tie(BaseType, OffsetInBase) = ToCheck.back();
+ ToCheck.pop_back();
+
// In the old format there is no distinction between fields and parent
// types, so in this case we consider all nodes up to the root.
if (!BaseType.getNode()) {
@@ -627,7 +649,9 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
// Follow the edge with the correct offset. Offset will be adjusted to
// be relative to the field type.
- BaseType = BaseType.getField(OffsetInBase);
+ for (auto &&F : BaseType.getField(OffsetInBase)) {
+ ToCheck.emplace_back(F, OffsetInBase);
+ }
}
// If the base object has a direct or indirect field of the subobject's type,
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index aeaca21a99cc5e..d5d26c3399966b 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6934,48 +6934,56 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
return Result;
}
-/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p
-/// Offset in place to be the offset within the field node returned.
+/// Returns one or several field nodes at the offset \p Offset in \p BaseNode.
+/// Returns empty vector if \p BaseNode has no fields with specified offset.
+/// Update \p Offset in place to be the offset within the field node returned.
///
/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
-MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
- const MDNode *BaseNode,
- APInt &Offset,
- bool IsNewFormat) {
+std::vector<MDNode *> TBAAVerifier::getFieldNodeFromTBAABaseNode(
+ Instruction &I, const MDNode *BaseNode, APInt &Offset, bool IsNewFormat) {
assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
// Scalar nodes have only one possible "field" -- their parent in the access
// hierarchy. Offset must be zero at this point, but our caller is supposed
// to check that.
if (BaseNode->getNumOperands() == 2)
- return cast<MDNode>(BaseNode->getOperand(1));
+ return {cast<MDNode>(BaseNode->getOperand(1))};
unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1;
unsigned NumOpsPerField = IsNewFormat ? 3 : 2;
+
+ unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands();
Idx += NumOpsPerField) {
auto *OffsetEntryCI =
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
if (OffsetEntryCI->getValue().ugt(Offset)) {
if (Idx == FirstFieldOpNo) {
- CheckFailed("Could not find TBAA parent in struct type node", &I,
- BaseNode, &Offset);
- return nullptr;
+ return {};
}
- unsigned PrevIdx = Idx - NumOpsPerField;
- auto *PrevOffsetEntryCI =
- mdconst::extract<ConstantInt>(BaseNode->getOperand(PrevIdx + 1));
- Offset -= PrevOffsetEntryCI->getValue();
- return cast<MDNode>(BaseNode->getOperand(PrevIdx));
+ LastIdx = Idx - NumOpsPerField;
+ break;
}
}
- unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
BaseNode->getOperand(LastIdx + 1));
- Offset -= LastOffsetEntryCI->getValue();
- return cast<MDNode>(BaseNode->getOperand(LastIdx));
+ auto LastOffsetVal = LastOffsetEntryCI->getValue();
+ Offset -= LastOffsetVal;
+
+ std::vector<MDNode *> Ret;
+ Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
+ while (LastIdx > FirstFieldOpNo) {
+ LastIdx -= NumOpsPerField;
+ LastOffsetEntryCI =
+ mdconst::extract<ConstantInt>(BaseNode->getOperand(LastIdx + 1));
+ if (LastOffsetEntryCI->getValue() != LastOffsetVal)
+ break;
+ Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
+ }
+
+ return Ret;
}
static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
@@ -7052,47 +7060,84 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
APInt Offset = OffsetCI->getValue();
- bool SeenAccessTypeInPath = false;
- SmallPtrSet<MDNode *, 4> StructPath;
+ SmallPtrSet<const MDNode *, 4> StructPath;
- for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
- BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
- IsNewFormat)) {
- if (!StructPath.insert(BaseNode).second) {
- CheckFailed("Cycle detected in struct path", &I, MD);
- return false;
- }
+ auto &&[Invalid, BaseNodeBitWidth] =
+ verifyTBAABaseNode(I, BaseNode, IsNewFormat);
- bool Invalid;
- unsigned BaseNodeBitWidth;
- std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
- IsNewFormat);
+ // If the base node is invalid in itself, then we've already printed all the
+ // errors we wanted to print.
+ if (Invalid)
+ return false;
- // If the base node is invalid in itself, then we've already printed all the
- // errors we wanted to print.
- if (Invalid)
- return false;
+ bool SeenAccessTypeInPath = BaseNode == AccessType;
+ if (SeenAccessTypeInPath) {
+ CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", &I,
+ MD, &Offset);
+ if (IsNewFormat)
+ return true;
+ }
- SeenAccessTypeInPath |= BaseNode == AccessType;
+ CheckTBAA(findAccessTypeNode(I, StructPath, Offset, IsNewFormat, AccessType,
+ BaseNode, MD) ||
+ SeenAccessTypeInPath,
+ "Did not see access type in access path!", &I, MD);
+ return true;
+}
- if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
- CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
- &I, MD, &Offset);
+bool TBAAVerifier::findAccessTypeNode(
+ Instruction &I, SmallPtrSetImpl<const MDNode *> &StructPath, APInt Offset,
+ bool IsNewFormat, const MDNode *AccessType, const MDNode *BaseNode,
+ const MDNode *MD) {
+ if (!BaseNode || IsRootTBAANode(BaseNode))
+ return false;
- CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
- (BaseNodeBitWidth == 0 && Offset == 0) ||
- (IsNewFormat && BaseNodeBitWidth == ~0u),
- "Access bit-width not the same as description bit-width", &I, MD,
- BaseNodeBitWidth, Offset.getBitWidth());
+ auto &&[Invalid, BaseNodeBitWidth] =
+ verifyTBAABaseNode(I, BaseNode, IsNewFormat);
- if (IsNewFormat && SeenAccessTypeInPath)
- break;
+ // If the base node is invalid in itself, then we've already printed all the
+ // errors we wanted to print.
+ if (Invalid)
+ return false;
+
+ // Offset at point of scalar access must be zero. Skip mismatched nodes.
+ if ((isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) &&
+ Offset != 0)
+ return false;
+
+ CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
+ (BaseNodeBitWidth == 0 && Offset == 0) ||
+ (IsNewFormat && BaseNodeBitWidth == ~0u),
+ "Access bit-width not the same as description bit-width", &I, MD,
+ BaseNodeBitWidth, Offset.getBitWidth());
+
+ bool SeenAccessTypeInPath = (BaseNode == AccessType && Offset == 0);
+
+ if (IsNewFormat && SeenAccessTypeInPath)
+ return true;
+
+ auto ProbableNodes =
+ getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, IsNewFormat);
+
+ if (!StructPath.insert(BaseNode).second) {
+ CheckFailed("Cycle detected in struct path", &I, MD);
+ return false;
}
- CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
- MD);
- return true;
+ for (auto *PN : ProbableNodes) {
+ if (!PN || IsRootTBAANode(PN))
+ continue;
+
+ SmallPtrSet<const MDNode *, 4> StructPathCopy;
+ StructPathCopy.insert(StructPath.begin(), StructPath.end());
+
+ if (findAccessTypeNode(I, StructPathCopy, Offset, IsNewFormat, AccessType,
+ PN, MD))
+ return true;
+ }
+
+ return SeenAccessTypeInPath;
}
char VerifierLegacyPass::ID = 0;
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
index 4049c78049e036..422f8d80404687 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
@@ -105,6 +105,22 @@ entry:
ret i32 %0
}
+; C vs. D => MayAlias.
+define i32 @f7(ptr %c, ptr %d) {
+entry:
+; CHECK-LABEL: f7
+; CHECK: MayAlias: store i16 7, {{.*}} <-> store i32 5,
+; OPT-LABEL: f7
+; OPT: store i32 5,
+; OPT: store i16 7,
+; OPT: load i32
+; OPT: ret i32
+ store i32 5, ptr %c, align 4, !tbaa !18 ; TAG_Union_int
+ store i16 7, ptr %d, align 4, !tbaa !17 ; TAG_Union_short
+ %0 = load i32, ptr %c, align 4, !tbaa !18 ; TAG_Union_int
+ ret i32 %0
+}
+
!0 = !{!"root"}
!1 = !{!0, i64 1, !"char"}
!2 = !{!1, i64 4, !"int"}
@@ -128,3 +144,7 @@ entry:
!14 = !{!4, i64 2, !"D", !11, i64 0, i64 2}
!15 = !{!14, !14, i64 0, i64 2} ; TAG_D
+
+!16 = !{!1, i64 2, !"Union", !11, i64 0, i64 2, !2, i64 0, i64 4}
+!17 = !{!16, !11, i64 0, i64 2} ; TAG_Union_short
+!18 = !{!16, !2, i64 0, i64 4} ; TAG_Union_int
diff --git a/llvm/test/Verifier/tbaa.ll b/llvm/test/Verifier/tbaa.ll
index abaa415aed749b..107192542d55d9 100644
--- a/llvm/test/Verifier/tbaa.ll
+++ b/llvm/test/Verifier/tbaa.ll
@@ -61,15 +61,15 @@ define void @f_1(ptr %ptr) {
; CHECK: Cycle detected in struct path
; CHECK-NEXT: store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
-; CHECK: Offset not zero at the point of scalar access
+; CHECK: Did not see access type in access path
+; CHECK-NEXT: store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
+
+; CHECK: Did not see access type in access path
; CHECK-NEXT: store i32 1, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
-; CHECK: Offset not zero at the point of scalar access
+; CHECK: Did not see access type in access path
; CHECK-NEXT: store i32 2, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
-; CHECK: Could not find TBAA parent in struct type node
-; CHECK-NEXT: store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
-
; CHECK: Did not see access type in access path!
; CHECK-NEXT: store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/76356
More information about the llvm-commits
mailing list