[clang] 458b1e9 - [TBAA] Refine pointer-tbaa for void pointers by pointer depth (#126047)

via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 20 04:06:59 PST 2025


Author: Bruno De Fraine
Date: 2025-02-20T16:06:56+04:00
New Revision: 458b1e9d2c61945f62b5bdfda93914d6a46b6cc7

URL: https://github.com/llvm/llvm-project/commit/458b1e9d2c61945f62b5bdfda93914d6a46b6cc7
DIFF: https://github.com/llvm/llvm-project/commit/458b1e9d2c61945f62b5bdfda93914d6a46b6cc7.diff

LOG: [TBAA] Refine pointer-tbaa for void pointers by pointer depth (#126047)

Commit 77d3f8a avoids distinct tags for any pointers where the ultimate
pointee type is `void`, to solve breakage in real-world code that uses
(indirections to) `void*` for polymorphism over different pointer types.

While this matches the TBAA implementation in GCC, this patch implements
a refinement that distinguishes void pointers by pointer depth, as
described in the "strict aliasing" documentation included in the
aforementioned commit:
> `void*` is permitted to alias any pointer type, `void**` is permitted
> to alias any pointer to pointer type, and so on.

For example, `void**` is no longer considered to alias `int*` in this
refinement, but it remains possible to use `void**` for polymorphism
over pointers to pointers.

Added: 
    

Modified: 
    clang/lib/CodeGen/CodeGenTBAA.cpp
    clang/lib/CodeGen/CodeGenTBAA.h
    clang/test/CXX/drs/cwg158.cpp
    clang/test/CodeGen/tbaa-pointers.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 3f1a24791ddd8..818b6dabaa144 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -80,6 +80,42 @@ llvm::MDNode *CodeGenTBAA::getChar() {
   return Char;
 }
 
+llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) {
+  assert(PtrDepth >= 1 && "Pointer must have some depth");
+
+  // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes
+  // for "any" pointers of increasing pointer depth, and are organized in the
+  // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ...
+  //
+  // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1),
+  // since there is no node for pointer depth 0.
+  //
+  // These "any" pointer type nodes are used in pointer TBAA. The type node of
+  // a concrete pointer type has the "any" pointer type node of appropriate
+  // pointer depth as its parent. The "any" pointer type nodes are also used
+  // directly for accesses to void pointers, or to specific pointers that we
+  // conservatively do not distinguish in pointer TBAA (e.g. pointers to
+  // members). Essentially, this establishes that e.g. void** can alias with
+  // any type that can unify with T**, ignoring things like qualifiers. Here, T
+  // is a variable that represents an arbitrary type, including pointer types.
+  // As such, each depth is naturally a subtype of the previous depth, and thus
+  // transitively of all previous depths.
+  if (AnyPtrs.size() < PtrDepth) {
+    AnyPtrs.reserve(PtrDepth);
+    auto Size = Module.getDataLayout().getPointerSize();
+    // Populate first element.
+    if (AnyPtrs.empty())
+      AnyPtrs.push_back(createScalarTypeNode("any pointer", getChar(), Size));
+    // Populate further elements.
+    for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) {
+      auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str();
+      AnyPtrs.push_back(createScalarTypeNode(Name, AnyPtrs[Idx - 1], Size));
+    }
+  }
+
+  return AnyPtrs[PtrDepth - 1];
+}
+
 static bool TypeHasMayAlias(QualType QTy) {
   // Tagged types have declarations, and therefore may have attributes.
   if (auto *TD = QTy->getAsTagDecl())
@@ -202,9 +238,8 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
   // they involve a significant representation 
diff erence.  We don't
   // currently do so, however.
   if (Ty->isPointerType() || Ty->isReferenceType()) {
-    llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size);
     if (!CodeGenOpts.PointerTBAA)
-      return AnyPtr;
+      return getAnyPtr();
     // C++ [basic.lval]p11 permits objects to accessed through an l-value of
     // similar type. Two types are similar under C++ [conv.qual]p2 if the
     // decomposition of the types into pointers, member pointers, and arrays has
@@ -232,7 +267,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     // common idioms and there is no good alternative to re-write the code
     // without strict-aliasing violations.
     if (Ty->isVoidType())
-      return AnyPtr;
+      return getAnyPtr(PtrDepth);
 
     assert(!isa<VariableArrayType>(Ty));
     // When the underlying type is a builtin type, we compute the pointee type
@@ -256,7 +291,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
       // similar-types rule.
       const auto *RT = Ty->getAs<RecordType>();
       if (!RT)
-        return AnyPtr;
+        return getAnyPtr(PtrDepth);
 
       // For unnamed structs or unions C's compatible types rule applies. Two
       // compatible types in 
diff erent compilation units can have 
diff erent
@@ -270,7 +305,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
       // compatibility rule, but it doesn't matter because you can never have a
       // pointer to an anonymous struct or union.
       if (!RT->getDecl()->getDeclName())
-        return AnyPtr;
+        return getAnyPtr(PtrDepth);
 
       // For non-builtin types use the mangled name of the canonical type.
       llvm::raw_svector_ostream TyOut(TyName);
@@ -281,7 +316,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     OutName += std::to_string(PtrDepth);
     OutName += " ";
     OutName += TyName;
-    return createScalarTypeNode(OutName, AnyPtr, Size);
+    return createScalarTypeNode(OutName, getAnyPtr(PtrDepth), Size);
   }
 
   // Accesses to arrays are accesses to objects of their element types.

diff  --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h
index ab3b05df7713b..0aae171d168df 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.h
+++ b/clang/lib/CodeGen/CodeGenTBAA.h
@@ -139,6 +139,7 @@ class CodeGenTBAA {
 
   llvm::MDNode *Root;
   llvm::MDNode *Char;
+  llvm::SmallVector<llvm::MDNode *, 4> AnyPtrs;
 
   /// getRoot - This is the mdnode for the root of the metadata type graph
   /// for this translation unit.
@@ -148,6 +149,10 @@ class CodeGenTBAA {
   /// considered to be equivalent to it.
   llvm::MDNode *getChar();
 
+  /// getAnyPtr - This is the mdnode for any pointer type of (at least) the
+  /// given pointer depth.
+  llvm::MDNode *getAnyPtr(unsigned PtrDepth = 1);
+
   /// CollectFields - Collect information about the fields of a type for
   /// !tbaa.struct metadata formation. Return false for an unsupported type.
   bool CollectFields(uint64_t BaseOffset,

diff  --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp
index 1f5c319e6bd25..a083ded82bb2e 100644
--- a/clang/test/CXX/drs/cwg158.cpp
+++ b/clang/test/CXX/drs/cwg158.cpp
@@ -42,5 +42,6 @@ const int * h(const int * (*p)[10],  int *(*q)[9]) {
 }
 
 // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0}
-// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYPTR:!.+]], i64 0}
+// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYP2PTR:!.+]], i64 0}
+// POINTER-TBAA: [[ANYP2PTR]] = !{!"any p2 pointer", [[ANYPTR:!.+]],
 // POINTER-TBAA: [[ANYPTR]] = !{!"any pointer"

diff  --git a/clang/test/CodeGen/tbaa-pointers.c b/clang/test/CodeGen/tbaa-pointers.c
index 48adac503357f..9cfaa0a47af6e 100644
--- a/clang/test/CodeGen/tbaa-pointers.c
+++ b/clang/test/CodeGen/tbaa-pointers.c
@@ -208,8 +208,10 @@ int void_ptrs(void **ptr) {
 // COMMON-LABEL: define i32 @void_ptrs(
 // COMMON-SAME: ptr noundef [[PTRA:%.+]])
 // COMMON:        [[PTR_ADDR:%.+]]  = alloca ptr, align 8
-// COMMON-NEXT:   store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
-// COMMON-NEXT:   [[L0:%.+]] = load ptr, ptr  [[PTR_ADDR]], align 8, !tbaa  [[ANYPTR]]
+// DISABLE-NEXT:  store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
+// DEFAULT-NEXT:  store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]]
+// DISABLE-NEXT:  [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]]
+// DEFAULT-NEXT:  [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]]
 // COMMON-NEXT:   [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa  [[ANYPTR]]
 // COMMON-NEXT:   [[BOOL:%.+]] = icmp ne ptr [[L1]], null
 // COMMON-NEXT:   [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64
@@ -220,7 +222,8 @@ int void_ptrs(void **ptr) {
 }
 
 // DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0}
-// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_POINTER:!.+]], i64 0}
+// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0}
+// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0}
 // DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0}
 // COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0}
 // COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0}
@@ -228,17 +231,19 @@ int void_ptrs(void **ptr) {
 // DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0}
 // DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0}
 // DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0}
-// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_POINTER]], i64 0}
+// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0}
+// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0}
 // DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0}
-// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_POINTER]], i64 0}
+// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0}
+// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0}
 // DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0}
-// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_POINTER]], i64 0}
+// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0}
 // DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0}
-// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_POINTER]], i64 0}
+// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0}
 // DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0}
 // DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0}
 // DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0}
-// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_POINTER]], i64 0}
+// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0}
 // DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0}
 // DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0}
 // DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0}
@@ -251,3 +256,4 @@ int void_ptrs(void **ptr) {
 // COMMON:  [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0}
 // COMMON:  [[INT_TY]] = !{!"int", [[CHAR]], i64 0}
 // DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]],  [[ANY_POINTER]], i64 0}
+// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]],  [[ANY_P2_POINTER]], i64 0}


        


More information about the cfe-commits mailing list