[clang] [compiler-rt] [llvm] [TySan] User-friendly (C style) pointer type names for error reports (PR #166381)
Benjamin Stott via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 03:31:39 PST 2025
https://github.com/BStott6 updated https://github.com/llvm/llvm-project/pull/166381
>From 4481075fce712a3f55493264c11fba6cd4015a4b Mon Sep 17 00:00:00 2001
From: BStott <Benjamin.Stott at sony.com>
Date: Tue, 4 Nov 2025 15:04:29 +0000
Subject: [PATCH 1/4] [TySan] User-friendly (C style) pointer type names for
error reports
---
clang/docs/TypeSanitizer.rst | 2 -
compiler-rt/test/tysan/print_stacktrace.c | 2 +-
compiler-rt/test/tysan/ptr-float.c | 2 +-
.../Instrumentation/TypeSanitizer.cpp | 40 ++++++++++++++++++-
4 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst
index 3c683a6c24bb4..c2f628cb231db 100644
--- a/clang/docs/TypeSanitizer.rst
+++ b/clang/docs/TypeSanitizer.rst
@@ -119,8 +119,6 @@ brief dictionary of these terms.
* ``omnipotent char``: This is a special type which can alias with anything. Its name comes from the C/C++
type ``char``.
-* ``type p[x]``: This signifies pointers to the type. ``x`` is the number of indirections to reach the final value.
- As an example, a pointer to a pointer to an integer would be ``type p2 int``.
TypeSanitizer is still experimental. User-facing error messages should be improved in the future to remove
references to LLVM IR specific terms.
diff --git a/compiler-rt/test/tysan/print_stacktrace.c b/compiler-rt/test/tysan/print_stacktrace.c
index 3ffb6063377d9..831be5e4afed9 100644
--- a/compiler-rt/test/tysan/print_stacktrace.c
+++ b/compiler-rt/test/tysan/print_stacktrace.c
@@ -10,7 +10,7 @@ void zero_array() {
for (i = 0; i < 1; ++i)
P[i] = 0.0f;
// CHECK: ERROR: TypeSanitizer: type-aliasing-violation
- // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float
+ // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float*
// CHECK: {{#0 0x.* in zero_array .*print_stacktrace.c:}}[[@LINE-3]]
// CHECK-SHORT-NOT: {{#1 0x.* in main .*print_stacktrace.c}}
// CHECK-LONG-NEXT: {{#1 0x.* in main .*print_stacktrace.c}}
diff --git a/compiler-rt/test/tysan/ptr-float.c b/compiler-rt/test/tysan/ptr-float.c
index aaa9895986988..145d5d8f289ea 100644
--- a/compiler-rt/test/tysan/ptr-float.c
+++ b/compiler-rt/test/tysan/ptr-float.c
@@ -7,7 +7,7 @@ void zero_array() {
for (i = 0; i < 1; ++i)
P[i] = 0.0f;
// CHECK: ERROR: TypeSanitizer: type-aliasing-violation
- // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float
+ // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float*
// CHECK: {{#0 0x.* in zero_array .*ptr-float.c:}}[[@LINE-3]]
}
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 87eba5f2c5242..e5109c047584e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -70,6 +70,12 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation(
"function calls. This verifies that they behave the same."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClUseTBAATypeNames(
+ "tysan-use-tbaa-type-names",
+ cl::desc("Print TBAA-style type names for pointers rather than C-style "
+ "names (e.g. 'p2 int' rather than 'int**')"),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
namespace {
@@ -260,6 +266,29 @@ static std::string encodeName(StringRef Name) {
return Output;
}
+/// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
+/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type.
+/// If the type name was changed, returns true and stores the new type name in `Dest`.
+/// Otherwise, returns false (`Dest` is unchanged).
+static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) {
+ if (!TypeName.consume_front("p"))
+ return false;
+
+ int Indirection;
+ if (TypeName.consumeInteger(10, Indirection))
+ return false;
+
+ if (!TypeName.consume_front(" "))
+ return false;
+
+ Dest.clear();
+ Dest.reserve(TypeName.size() + Indirection); // One * per indirection
+ Dest.append(TypeName);
+ Dest.append(Indirection, '*');
+
+ return true;
+}
+
std::string
TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
TypeNameMapTy &TypeNames) {
@@ -355,7 +384,16 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
// [2, member count, [type pointer, offset]..., name]
LLVMContext &C = MD->getContext();
- Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
+ StringRef TypeName = NameNode->getString();
+
+ // Convert LLVM-internal TBAA-style type names to C-style type names
+ // (more user-friendly)
+ std::string CStyleTypeName;
+ if (!ClUseTBAATypeNames)
+ if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
+ TypeName = CStyleTypeName;
+
+ Constant *NameData = ConstantDataArray::getString(C, TypeName);
SmallVector<Type *> TDSubTys;
SmallVector<Constant *> TDSubData;
>From a71d469a233dffbac5fb2935bbecbc07416597bd Mon Sep 17 00:00:00 2001
From: BStott <Benjamin.Stott at sony.com>
Date: Tue, 4 Nov 2025 17:21:49 +0000
Subject: [PATCH 2/4] Fix failing test
---
clang/test/CodeGen/sanitize-type-globals.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp b/clang/test/CodeGen/sanitize-type-globals.cpp
index 1154ab4ca5df2..1300396795ff3 100644
--- a/clang/test/CodeGen/sanitize-type-globals.cpp
+++ b/clang/test/CodeGen/sanitize-type-globals.cpp
@@ -13,7 +13,7 @@
// CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat
// CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat
// CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 x i8] c"any pointer\00" }, comdat
-// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 int\00" }, comdat
+// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] c"int*\00" }, comdat
// CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat
// CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat
// CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr @__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata"
>From ac99a5be6be662c51d3c838774c76c28e5382bc7 Mon Sep 17 00:00:00 2001
From: BStott <Benjamin.Stott at sony.com>
Date: Thu, 6 Nov 2025 13:56:29 +0000
Subject: [PATCH 3/4] Remove command line flag, fix formatting
---
.../Instrumentation/TypeSanitizer.cpp | 20 +++++++------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index e5109c047584e..ab59c3e9de151 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -70,12 +70,6 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation(
"function calls. This verifies that they behave the same."),
cl::Hidden, cl::init(false));
-static cl::opt<bool> ClUseTBAATypeNames(
- "tysan-use-tbaa-type-names",
- cl::desc("Print TBAA-style type names for pointers rather than C-style "
- "names (e.g. 'p2 int' rather than 'int**')"),
- cl::Hidden, cl::init(false));
-
STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
namespace {
@@ -267,10 +261,11 @@ static std::string encodeName(StringRef Name) {
}
/// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
-/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type.
-/// If the type name was changed, returns true and stores the new type name in `Dest`.
-/// Otherwise, returns false (`Dest` is unchanged).
-static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) {
+/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly
+/// name for this type. If the type name was changed, returns true and stores
+/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged).
+static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName,
+ std::string &Dest) {
if (!TypeName.consume_front("p"))
return false;
@@ -389,9 +384,8 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
// Convert LLVM-internal TBAA-style type names to C-style type names
// (more user-friendly)
std::string CStyleTypeName;
- if (!ClUseTBAATypeNames)
- if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
- TypeName = CStyleTypeName;
+ if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
+ TypeName = CStyleTypeName;
Constant *NameData = ConstantDataArray::getString(C, TypeName);
SmallVector<Type *> TDSubTys;
>From 62d29b91c330606b4ce603dc177fe37098448e24 Mon Sep 17 00:00:00 2001
From: BStott <Benjamin.Stott at sony.com>
Date: Thu, 20 Nov 2025 11:31:16 +0000
Subject: [PATCH 4/4] Rework pointer typename rewriting to occur in runtime
rather than instrumentation, fix demangling for pointer names
---
compiler-rt/lib/tysan/tysan.cpp | 54 ++++++++++++++++---
.../Instrumentation/TypeSanitizer.cpp | 34 +-----------
2 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp
index 1c67adeba0fc5..76fa8f45ebe4f 100644
--- a/compiler-rt/lib/tysan/tysan.cpp
+++ b/compiler-rt/lib/tysan/tysan.cpp
@@ -22,6 +22,7 @@
#include "tysan/tysan.h"
+#include <ctype.h>
#include <stdint.h>
#include <string.h>
@@ -40,20 +41,62 @@ tysan_copy_types(const void *daddr, const void *saddr, uptr size) {
internal_memmove(shadow_for(daddr), shadow_for(saddr), size * sizeof(uptr));
}
-static const char *getDisplayName(const char *Name) {
+/// Struct returned by `parseIndirectionPrefix`.
+struct ParseIndirectionPrefixResult {
+ /// Level of indirection - 0 if the prefix is not found.
+ size_t Indirection;
+ /// Pointer to the remaining part of the name after the indirection prefix.
+ /// (This is the original pointer if the prefix is not found.)
+ const char *RemainingName;
+};
+
+/// Parses the "p{indirection} " prefix given to pointer type names in TBAA.
+static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) {
+ size_t CharIndex = 0;
+
+ // Parse 'p'.
+ // This also handles the case of an empty string.
+ if (Name[CharIndex++] != 'p')
+ return {0, Name};
+
+ // Parse indirection level.
+ size_t Indirection = 0;
+ while (isdigit(Name[CharIndex])) {
+ const auto DigitValue = static_cast<size_t>(Name[CharIndex] - '0');
+ Indirection = Indirection * 10 + DigitValue;
+ ++CharIndex;
+ }
+
+ // Parse space.
+ if (Name[CharIndex++] != ' ')
+ return {0, Name};
+
+ return {Indirection, Name + CharIndex};
+}
+
+static void printDisplayName(const char *Name) {
if (Name[0] == '\0')
- return "<anonymous type>";
+ Printf("<anonymous type>");
+
+ // Parse indirection prefix and remove it.
+ const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name);
// Clang generates tags for C++ types that demangle as typeinfo. Remove the
// prefix from the generated string.
const char *TIPrefix = "typeinfo name for ";
size_t TIPrefixLen = strlen(TIPrefix);
- const char *DName = Symbolizer::GetOrInit()->Demangle(Name);
+ const char *DName = Symbolizer::GetOrInit()->Demangle(RemainingName);
if (!internal_strncmp(DName, TIPrefix, TIPrefixLen))
DName += TIPrefixLen;
- return DName;
+ // Print type name.
+ Printf("%s", DName);
+
+ // Print asterisks for indirection (C pointer notation).
+ for (size_t i = 0; i < Indirection; ++i) {
+ Printf("*");
+ }
}
static void printTDName(tysan_type_descriptor *td) {
@@ -75,8 +118,7 @@ static void printTDName(tysan_type_descriptor *td) {
}
break;
case TYSAN_STRUCT_TD:
- Printf("%s", getDisplayName(
- (char *)(td->Struct.Members + td->Struct.MemberCount)));
+ printDisplayName((char *)(td->Struct.Members + td->Struct.MemberCount));
break;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index ab59c3e9de151..87eba5f2c5242 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -260,30 +260,6 @@ static std::string encodeName(StringRef Name) {
return Output;
}
-/// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
-/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly
-/// name for this type. If the type name was changed, returns true and stores
-/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged).
-static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName,
- std::string &Dest) {
- if (!TypeName.consume_front("p"))
- return false;
-
- int Indirection;
- if (TypeName.consumeInteger(10, Indirection))
- return false;
-
- if (!TypeName.consume_front(" "))
- return false;
-
- Dest.clear();
- Dest.reserve(TypeName.size() + Indirection); // One * per indirection
- Dest.append(TypeName);
- Dest.append(Indirection, '*');
-
- return true;
-}
-
std::string
TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
TypeNameMapTy &TypeNames) {
@@ -379,15 +355,7 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
// [2, member count, [type pointer, offset]..., name]
LLVMContext &C = MD->getContext();
- StringRef TypeName = NameNode->getString();
-
- // Convert LLVM-internal TBAA-style type names to C-style type names
- // (more user-friendly)
- std::string CStyleTypeName;
- if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
- TypeName = CStyleTypeName;
-
- Constant *NameData = ConstantDataArray::getString(C, TypeName);
+ Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
SmallVector<Type *> TDSubTys;
SmallVector<Constant *> TDSubData;
More information about the llvm-commits
mailing list