[clang] [TBAA] Only emit pointer tbaa metedata for record types. (PR #116991)

Florian Hahn via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 21 10:48:05 PST 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/116991

>From 3a8c157ca484db54a3e1d1ff8061d7b76ce46834 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 20 Nov 2024 15:31:54 +0000
Subject: [PATCH 1/5] [TBAA] Add run-line with -fpointer-tbaa to cwg158

---
 clang/test/CXX/drs/cwg158.cpp | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp
index 9301c790297e9d..6e18156d9ccc6f 100644
--- a/clang/test/CXX/drs/cwg158.cpp
+++ b/clang/test/CXX/drs/cwg158.cpp
@@ -1,12 +1,14 @@
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++98 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++11 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++14 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++98 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++11 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++14 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -pointer-tbaa -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,POINTER-TBAA %s
 
 // cwg158: yes
 
 // CHECK-LABEL: define {{.*}} @_Z1f
 const int *f(const int * const *p, int **q) {
+  // CHECK: load ptr, ptr %p.addr
   // CHECK: load ptr, {{.*}}, !tbaa ![[INTPTR_TBAA:[^,]*]]
   const int *x = *p;
   // CHECK: store ptr null, {{.*}}, !tbaa ![[INTPTR_TBAA]]
@@ -18,10 +20,24 @@ struct A {};
 
 // CHECK-LABEL: define {{.*}} @_Z1g
 const int *(A::*const *g(const int *(A::* const **p)[3], int *(A::***q)[3]))[3] {
+  // CHECK: load ptr, ptr %p.addr
   // CHECK: load ptr, {{.*}}, !tbaa ![[MEMPTR_TBAA:[^,]*]]
   const int *(A::*const *x)[3] = *p;
-  // CHECK: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
+  // DEFAULT: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
+  // POINTER-TBAA-NOT: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
   *q = 0;
   return x;
 }
 
+// CHECK-LABEL: define {{.*}} @_Z1h
+const int * h(const int * (*p)[10],  int *(*q)[9]) {
+  // CHECK:  load ptr, ptr %p.addr, align 8, !tbaa [[PTRARRAY_TBAA:!.+]]
+  const int * x = *p[0];
+
+  // CHECK: load ptr, ptr %q.addr, align 8, !tbaa [[PTRARRAY_TBAA]]
+  *q[0] = 0;
+  return x;
+}
+
+// POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0}
+// POINTER-TBAA: [[PTRARRAY_TY:!.+]] = !{!"p1 _ZTSPi", !4, i64 0}

>From 776f4279467f479ff3ee5b89c00b5f8e210e987a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 20 Nov 2024 15:37:37 +0000
Subject: [PATCH 2/5] [TBAA] Only emit pointer tbaa metedata for record types.

Be conservative if the type isn't a record type. Handling other types may
require stripping const-qualifiers inside the type, e.g. MemberPointerType.

Without this, we assign different tags to the accesses for p an q in the
second test in cwg158.
---
 clang/lib/CodeGen/CodeGenTBAA.cpp |  6 ++++++
 clang/test/CXX/drs/cwg158.cpp     | 13 ++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index c31579e8323174..73aae6b0db8d90 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -230,6 +230,12 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
               ->getString();
       TyName = Name;
     } else {
+      // Be conservative if the type isn't a record type. Handling other types
+      // may require stripping const-qualifiers inside the type, e.g.
+      // MemberPointerType.
+      if (!Ty->isRecordType())
+        return AnyPtr;
+
       // For non-builtin types use the mangled name of the canonical type.
       llvm::raw_svector_ostream TyOut(TyName);
       MangleCtx->mangleCanonicalTypeName(QualType(Ty, 0), TyOut);
diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp
index 6e18156d9ccc6f..cee0b506509015 100644
--- a/clang/test/CXX/drs/cwg158.cpp
+++ b/clang/test/CXX/drs/cwg158.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++98 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++11 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++14 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,DEFAULT %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -pointer-tbaa -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,POINTER-TBAA %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++98 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++11 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++14 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -pointer-tbaa -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
 
 // cwg158: yes
 
@@ -23,8 +23,7 @@ const int *(A::*const *g(const int *(A::* const **p)[3], int *(A::***q)[3]))[3]
   // CHECK: load ptr, ptr %p.addr
   // CHECK: load ptr, {{.*}}, !tbaa ![[MEMPTR_TBAA:[^,]*]]
   const int *(A::*const *x)[3] = *p;
-  // DEFAULT: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
-  // POINTER-TBAA-NOT: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
+  // CHECK: store ptr null, {{.*}}, !tbaa ![[MEMPTR_TBAA]]
   *q = 0;
   return x;
 }

>From e0076d68d416357f66a9dc25c1452de30e6ff76a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 20 Nov 2024 22:41:15 +0000
Subject: [PATCH 3/5] !fixup loop through array types

---
 clang/lib/CodeGen/CodeGenTBAA.cpp | 15 ++++++++-------
 clang/test/CXX/drs/cwg158.cpp     |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 73aae6b0db8d90..e40addbe2bea7e 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -206,12 +206,14 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     if (!CodeGenOpts.PointerTBAA)
       return AnyPtr;
     // Compute the depth of the pointer and generate a tag of the form "p<depth>
-    // <base type tag>".
+    // <base type tag>". Look through pointer and array types to determine the
+    // base type.
     unsigned PtrDepth = 0;
     do {
       PtrDepth++;
-      Ty = Ty->getPointeeType().getTypePtr();
-    } while (Ty->isPointerType());
+      Ty = Ty->isPointerType() ? Ty->getPointeeType().getTypePtr()
+                               : Ty->getArrayElementTypeNoTypeQual();
+    } while (Ty->isPointerType() || Ty->isArrayType());
     Ty = Context.getBaseElementType(QualType(Ty, 0)).getTypePtr();
     assert(!isa<VariableArrayType>(Ty));
     // When the underlying type is a builtin type, we compute the pointee type
@@ -230,10 +232,9 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
               ->getString();
       TyName = Name;
     } else {
-      // Be conservative if the type isn't a record type. Handling other types
-      // may require stripping const-qualifiers inside the type, e.g.
-      // MemberPointerType.
-      if (!Ty->isRecordType())
+      // Be conservative if the type a MemberPointerType. Those would require
+      // stripping const-qualifiers inside the type.
+      if (Ty->isMemberPointerType())
         return AnyPtr;
 
       // For non-builtin types use the mangled name of the canonical type.
diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp
index cee0b506509015..a990aea2db2ecc 100644
--- a/clang/test/CXX/drs/cwg158.cpp
+++ b/clang/test/CXX/drs/cwg158.cpp
@@ -39,4 +39,4 @@ const int * h(const int * (*p)[10],  int *(*q)[9]) {
 }
 
 // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0}
-// POINTER-TBAA: [[PTRARRAY_TY:!.+]] = !{!"p1 _ZTSPi", !4, i64 0}
+// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p3 int", !4, i64 0}

>From 65755fed015b9a58137f300aee040d4f1a3869f1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Nov 2024 14:11:03 +0000
Subject: [PATCH 4/5] !fixup address comments

---
 clang/lib/CodeGen/CodeGenTBAA.cpp | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index e40addbe2bea7e..72e79bf94a05c5 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -205,16 +205,27 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size);
     if (!CodeGenOpts.PointerTBAA)
       return AnyPtr;
-    // Compute the depth of the pointer and generate a tag of the form "p<depth>
-    // <base type tag>". Look through pointer and array types to determine the
-    // base type.
+    // C++ [basic.lval]p11 permits objects to accessed through an l-value of
+    // similar type. Two types are similar under C++ [conv.qual]p2 if the
+    // decomposition of the types into pointers, member pointers, and arrays has
+    // the same structure when ignoring cv-qualifiers at each level of the
+    // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which
+    // would really complicate any attempt to distinguish pointers to arrays by
+    // their bounds. It's simpler, and much easier to explain to users, to
+    // simply treat all pointers to arrays as pointers to their element type for
+    // aliasing purposes. So when creating a TBAA tag for a pointer type, we
+    // recursively ignore both qualifiers and array types when decomposing the
+    // pointee type. The only meaningful remaining structure is the number of
+    // pointer types we encountered along the way, so we just produce the tag
+    // "p<depth> <base type tag>". If we do find a member pointer type, for now
+    // we just conservatively bail out with AnyPtr (below) rather than trying to
+    // create a tag that honors the similar-type rules while still
+    // distinguishing different kinds of member pointer.
     unsigned PtrDepth = 0;
     do {
       PtrDepth++;
-      Ty = Ty->isPointerType() ? Ty->getPointeeType().getTypePtr()
-                               : Ty->getArrayElementTypeNoTypeQual();
+      Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe();
     } while (Ty->isPointerType() || Ty->isArrayType());
-    Ty = Context.getBaseElementType(QualType(Ty, 0)).getTypePtr();
     assert(!isa<VariableArrayType>(Ty));
     // When the underlying type is a builtin type, we compute the pointee type
     // string recursively, which is implicitly more forgiving than the standards
@@ -232,9 +243,10 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
               ->getString();
       TyName = Name;
     } else {
-      // Be conservative if the type a MemberPointerType. Those would require
-      // stripping const-qualifiers inside the type.
-      if (Ty->isMemberPointerType())
+      // Be conservative if the type isn't a RecordType. We are  specifically
+      // required to do this for member pointers until we implement the
+      // similar-types rule.
+      if (!Ty->isRecordType())
         return AnyPtr;
 
       // For non-builtin types use the mangled name of the canonical type.

>From 2372e815a1e1d453b6435dbc0a40d40dc6491c78 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Nov 2024 18:47:19 +0000
Subject: [PATCH 5/5] !fixup address latest comments, thanks!

---
 clang/lib/CodeGen/CodeGenTBAA.cpp | 4 ++--
 clang/test/CXX/drs/cwg158.cpp     | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 72e79bf94a05c5..4bcb541156bd23 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -225,7 +225,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     do {
       PtrDepth++;
       Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe();
-    } while (Ty->isPointerType() || Ty->isArrayType());
+    } while (Ty->isPointerType());
     assert(!isa<VariableArrayType>(Ty));
     // When the underlying type is a builtin type, we compute the pointee type
     // string recursively, which is implicitly more forgiving than the standards
@@ -243,7 +243,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
               ->getString();
       TyName = Name;
     } else {
-      // Be conservative if the type isn't a RecordType. We are  specifically
+      // Be conservative if the type isn't a RecordType. We are specifically
       // required to do this for member pointers until we implement the
       // similar-types rule.
       if (!Ty->isRecordType())
diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp
index a990aea2db2ecc..2a744382647773 100644
--- a/clang/test/CXX/drs/cwg158.cpp
+++ b/clang/test/CXX/drs/cwg158.cpp
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux -std=c++11 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
 // RUN: %clang_cc1 -triple x86_64-linux -std=c++14 %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
 // RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
-// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -pointer-tbaa -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux -std=c++1z %s -O3 -pointer-tbaa -disable-llvm-passes -pedantic-errors -emit-llvm -o - | FileCheck --check-prefixes=CHECK,POINTER-TBAA %s
 
 // cwg158: yes
 
@@ -39,4 +39,5 @@ const int * h(const int * (*p)[10],  int *(*q)[9]) {
 }
 
 // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0}
-// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p3 int", !4, i64 0}
+// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYPTR:!.+]], i64 0}
+// POINTER-TBAA: [[ANYPTR]] = !{!"any pointer"



More information about the cfe-commits mailing list