[llvm-branch-commits] [clang] [AllocToken, Clang] Implement TypeHashPointerSplit mode (PR #156840)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Sep 9 06:03:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Marco Elver (melver)
<details>
<summary>Changes</summary>
Implement the TypeHashPointerSplit mode: This mode assigns a token ID
based on the hash of the allocated type's name, where the top half
ID-space is reserved for types that contain pointers and the bottom half
for types that do not contain pointers.
This mode with max tokens of 2 (`-falloc-token-max=2`) may also
be valuable for heap hardening strategies that simply separate pointer
types from non-pointer types.
Make it the new default mode.
Link: https://discourse.llvm.org/t/rfc-a-framework-for-allocator-partitioning-hints/87434
---
This change is part of the following series:
1. https://github.com/llvm/llvm-project/pull/156838
2. https://github.com/llvm/llvm-project/pull/156839
3. https://github.com/llvm/llvm-project/pull/156840
4. https://github.com/llvm/llvm-project/pull/156841
5. https://github.com/llvm/llvm-project/pull/156842
---
Full diff: https://github.com/llvm/llvm-project/pull/156840.diff
7 Files Affected:
- (modified) clang/docs/AllocToken.rst (+7-2)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+64-3)
- (added) clang/test/CodeGenCXX/alloc-token-pointer.cpp (+177)
- (modified) llvm/lib/Transforms/Instrumentation/AllocToken.cpp (+51-6)
- (modified) llvm/test/Instrumentation/AllocToken/extralibfuncs.ll (+1-1)
- (modified) llvm/test/Instrumentation/AllocToken/nonlibcalls.ll (+1-1)
- (modified) llvm/test/Instrumentation/AllocToken/remark.ll (+1-1)
``````````diff
diff --git a/clang/docs/AllocToken.rst b/clang/docs/AllocToken.rst
index a7bb8877f371b..fb354d6738ea3 100644
--- a/clang/docs/AllocToken.rst
+++ b/clang/docs/AllocToken.rst
@@ -31,13 +31,18 @@ Token Assignment Mode
The default mode to calculate tokens is:
-* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of
- the allocated type's name.
+* *TypeHashPointerSplit* (mode=3): This mode assigns a token ID based on
+ the hash of the allocated type's name, where the top half ID-space is
+ reserved for types that contain pointers and the bottom half for types that
+ do not contain pointers.
Other token ID assignment modes are supported, but they may be subject to
change or removal. These may (experimentally) be selected with ``-mllvm
-alloc-token-mode=<mode>``:
+* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of
+ the allocated type's name.
+
* *Random* (mode=1): This mode assigns a statically-determined random token ID
to each allocation site.
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 14e4d648428ef..e7a0e7696e204 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1277,14 +1277,75 @@ void CodeGenFunction::EmitAllocTokenHint(llvm::CallBase *CB,
assert(SanOpts.has(SanitizerKind::AllocToken) &&
"Only needed with -fsanitize=alloc-token");
+ llvm::MDBuilder MDB(getLLVMContext());
+
+ // Get unique type name.
PrintingPolicy Policy(CGM.getContext().getLangOpts());
Policy.SuppressTagKeyword = true;
Policy.FullyQualifiedName = true;
std::string TypeName = AllocType.getCanonicalType().getAsString(Policy);
- auto *TypeMDS = llvm::MDString::get(CGM.getLLVMContext(), TypeName);
+ auto *TypeNameMD = MDB.createString(TypeName);
+
+ // Check if QualType contains a pointer. Implements a simple DFS to
+ // recursively check if a type contains a pointer type.
+ llvm::SmallPtrSet<const RecordDecl *, 4> VisitedRD;
+ bool IncompleteType = false;
+ auto TypeContainsPtr = [&](auto &&self, QualType T) -> bool {
+ QualType CanonicalType = T.getCanonicalType();
+ if (CanonicalType->isPointerType())
+ return true; // base case
+
+ // Look through typedef chain to check for special types.
+ for (QualType CurrentT = T; const auto *TT = CurrentT->getAs<TypedefType>();
+ CurrentT = TT->getDecl()->getUnderlyingType()) {
+ const IdentifierInfo *II = TT->getDecl()->getIdentifier();
+ if (!II)
+ continue;
+ // Special Case: Syntactically uintptr_t is not a pointer; semantically,
+ // however, very likely used as such. Therefore, classify uintptr_t as a
+ // pointer, too.
+ if (II->isStr("uintptr_t"))
+ return true;
+ }
+
+ // The type is an array; check the element type.
+ if (const ArrayType *AT = CanonicalType->getAsArrayTypeUnsafe())
+ return self(self, AT->getElementType());
+ // The type is a struct, class, or union.
+ if (const RecordDecl *RD = CanonicalType->getAsRecordDecl()) {
+ if (!RD->isCompleteDefinition()) {
+ IncompleteType = true;
+ return false;
+ }
+ if (!VisitedRD.insert(RD).second)
+ return false; // already visited
+ // Check all fields.
+ for (const FieldDecl *Field : RD->fields()) {
+ if (self(self, Field->getType()))
+ return true;
+ }
+ // For C++ classes, also check base classes.
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+ // Polymorphic types require a vptr.
+ if (CXXRD->isPolymorphic())
+ return true;
+ for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
+ if (self(self, Base.getType()))
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+ const bool ContainsPtr = TypeContainsPtr(TypeContainsPtr, AllocType);
+ if (!ContainsPtr && IncompleteType)
+ return;
+ auto *ContainsPtrC = Builder.getInt1(ContainsPtr);
+ auto *ContainsPtrMD = MDB.createConstant(ContainsPtrC);
- // Format: !{<type-name>}
- auto *MDN = llvm::MDNode::get(CGM.getLLVMContext(), {TypeMDS});
+ // Format: !{<type-name>, <contains-pointer>}
+ auto *MDN =
+ llvm::MDNode::get(CGM.getLLVMContext(), {TypeNameMD, ContainsPtrMD});
CB->setMetadata(llvm::LLVMContext::MD_alloc_token_hint, MDN);
}
diff --git a/clang/test/CodeGenCXX/alloc-token-pointer.cpp b/clang/test/CodeGenCXX/alloc-token-pointer.cpp
new file mode 100644
index 0000000000000..7adb75c7afebb
--- /dev/null
+++ b/clang/test/CodeGenCXX/alloc-token-pointer.cpp
@@ -0,0 +1,177 @@
+// Check -fsanitize=alloc-token TypeHashPointerSplit mode with only 2
+// tokens so we effectively only test the contains-pointer logic.
+//
+// RUN: %clang_cc1 -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s
+
+#include "../Analysis/Inputs/system-header-simulator-cxx.h"
+
+typedef __UINTPTR_TYPE__ uintptr_t;
+
+extern "C" {
+void *malloc(size_t size);
+}
+
+// CHECK-LABEL: @_Z15test_malloc_intv(
+void *test_malloc_int() {
+ // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 0)
+ int *a = (int *)malloc(sizeof(int));
+ *a = 42;
+ return a;
+}
+
+// CHECK-LABEL: @_Z15test_malloc_ptrv(
+int **test_malloc_ptr() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 8, i64 0)
+ int **a = (int **)malloc(sizeof(int*));
+ *a = nullptr;
+ return a;
+}
+
+// CHECK-LABEL: @_Z12test_new_intv(
+int *test_new_int() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 4, i64 0){{.*}} !alloc_token_hint
+ return new int;
+}
+
+// CHECK-LABEL: @_Z20test_new_ulong_arrayv(
+unsigned long *test_new_ulong_array() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 0){{.*}} !alloc_token_hint
+ return new unsigned long[10];
+}
+
+// CHECK-LABEL: @_Z12test_new_ptrv(
+int **test_new_ptr() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1){{.*}} !alloc_token_hint
+ return new int*;
+}
+
+// CHECK-LABEL: @_Z18test_new_ptr_arrayv(
+int **test_new_ptr_array() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 1){{.*}} !alloc_token_hint
+ return new int*[10];
+}
+
+struct ContainsPtr {
+ int a;
+ char *buf;
+};
+
+// CHECK-LABEL: @_Z27test_malloc_struct_with_ptrv(
+ContainsPtr *test_malloc_struct_with_ptr() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 16, i64 0)
+ ContainsPtr *c = (ContainsPtr *)malloc(sizeof(ContainsPtr));
+ return c;
+}
+
+// CHECK-LABEL: @_Z33test_malloc_struct_array_with_ptrv(
+ContainsPtr *test_malloc_struct_array_with_ptr() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 160, i64 0)
+ ContainsPtr *c = (ContainsPtr *)malloc(10 * sizeof(ContainsPtr));
+ return c;
+}
+
+// CHECK-LABEL: @_Z32test_operatornew_struct_with_ptrv(
+ContainsPtr *test_operatornew_struct_with_ptr() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0)
+ ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(ContainsPtr));
+ return c;
+}
+
+// CHECK-LABEL: @_Z38test_operatornew_struct_array_with_ptrv(
+ContainsPtr *test_operatornew_struct_array_with_ptr() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0)
+ ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(ContainsPtr));
+ return c;
+}
+
+// CHECK-LABEL: @_Z33test_operatornew_struct_with_ptr2v(
+ContainsPtr *test_operatornew_struct_with_ptr2() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0)
+ ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(*c));
+ return c;
+}
+
+// CHECK-LABEL: @_Z39test_operatornew_struct_array_with_ptr2v(
+ContainsPtr *test_operatornew_struct_array_with_ptr2() {
+ // FIXME: This should not be token ID 0!
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0)
+ ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(*c));
+ return c;
+}
+
+// CHECK-LABEL: @_Z24test_new_struct_with_ptrv(
+ContainsPtr *test_new_struct_with_ptr() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1){{.*}} !alloc_token_hint
+ return new ContainsPtr;
+}
+
+// CHECK-LABEL: @_Z30test_new_struct_array_with_ptrv(
+ContainsPtr *test_new_struct_array_with_ptr() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 160, i64 1){{.*}} !alloc_token_hint
+ return new ContainsPtr[10];
+}
+
+class TestClass {
+public:
+ void Foo();
+ ~TestClass();
+ int data[16];
+};
+
+// CHECK-LABEL: @_Z14test_new_classv(
+TestClass *test_new_class() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 64, i64 0){{.*}} !alloc_token_hint
+ return new TestClass();
+}
+
+// CHECK-LABEL: @_Z20test_new_class_arrayv(
+TestClass *test_new_class_array() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 648, i64 0){{.*}} !alloc_token_hint
+ return new TestClass[10];
+}
+
+// Test that we detect that virtual classes have implicit vtable pointer.
+class VirtualTestClass {
+public:
+ virtual void Foo();
+ virtual ~VirtualTestClass();
+ int data[16];
+};
+
+// CHECK-LABEL: @_Z22test_new_virtual_classv(
+VirtualTestClass *test_new_virtual_class() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 72, i64 1){{.*}} !alloc_token_hint
+ return new VirtualTestClass();
+}
+
+// CHECK-LABEL: @_Z28test_new_virtual_class_arrayv(
+VirtualTestClass *test_new_virtual_class_array() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 728, i64 1){{.*}} !alloc_token_hint
+ return new VirtualTestClass[10];
+}
+
+// uintptr_t is treated as a pointer.
+struct MyStructUintptr {
+ int a;
+ uintptr_t ptr;
+};
+
+// CHECK-LABEL: @_Z18test_uintptr_isptrv(
+MyStructUintptr *test_uintptr_isptr() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1)
+ return new MyStructUintptr;
+}
+
+using uptr = uintptr_t;
+// CHECK-LABEL: @_Z19test_uintptr_isptr2v(
+uptr *test_uintptr_isptr2() {
+ // CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1)
+ return new uptr;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 4ea68470ca684..74cda227d50a7 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -70,6 +70,11 @@ enum class TokenMode : unsigned {
/// Token ID based on allocated type hash.
TypeHash = 2,
+ /// Token ID based on allocated type hash, where the top half ID-space is
+ /// reserved for types that contain pointers and the bottom half for types
+ /// that do not contain pointers.
+ TypeHashPointerSplit = 3,
+
// Mode count - keep last
ModeCount
};
@@ -89,7 +94,7 @@ struct ModeParser : public cl::parser<unsigned> {
cl::opt<unsigned, false, ModeParser>
ClMode("alloc-token-mode", cl::desc("Token assignment mode"), cl::Hidden,
- cl::init(static_cast<unsigned>(TokenMode::TypeHash)));
+ cl::init(static_cast<unsigned>(TokenMode::TypeHashPointerSplit)));
cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
cl::desc("The allocation function prefix"),
@@ -142,16 +147,23 @@ STATISTIC(NumAllocations, "Allocations found");
/// Returns the !alloc_token_hint metadata if available.
///
-/// Expected format is: !{<type-name>}
+/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenHintMetadata(const CallBase &CB) {
MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token_hint);
if (!Ret)
return nullptr;
- assert(Ret->getNumOperands() == 1 && "bad !alloc_token_hint");
+ assert(Ret->getNumOperands() == 2 && "bad !alloc_token_hint");
assert(isa<MDString>(Ret->getOperand(0)));
+ assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
return Ret;
}
+bool containsPointer(const MDNode *MD) {
+ ConstantAsMetadata *C = cast<ConstantAsMetadata>(MD->getOperand(1));
+ auto *CI = cast<ConstantInt>(C->getValue());
+ return CI->getValue().getBoolValue();
+}
+
class ModeBase {
public:
explicit ModeBase(uint64_t MaxTokens) : MaxTokens(MaxTokens) {}
@@ -198,12 +210,20 @@ class TypeHashMode : public ModeBase {
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+ const auto [N, H] = getHash(CB, ORE);
+ return N ? boundedToken(H) : H;
+ }
+
+protected:
+ std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
+ OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenHintMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
- return boundedToken(xxHash64(S->getString()));
+ return {N, xxHash64(S->getString())};
}
+ // Fallback.
remarkNoHint(CB, ORE);
- return ClFallbackToken;
+ return {nullptr, ClFallbackToken};
}
/// Remark that there was no precise type information.
@@ -219,6 +239,26 @@ class TypeHashMode : public ModeBase {
}
};
+/// Implementation for TokenMode::TypeHashPointerSplit.
+class TypeHashPointerSplitMode : public TypeHashMode {
+public:
+ using TypeHashMode::TypeHashMode;
+
+ uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+ if (MaxTokens == 1)
+ return 0;
+ const uint64_t HalfTokens =
+ (MaxTokens ? MaxTokens : std::numeric_limits<uint64_t>::max()) / 2;
+ const auto [N, H] = getHash(CB, ORE);
+ if (!N)
+ return H; // fallback token
+ uint64_t Hash = H % HalfTokens; // base hash
+ if (containsPointer(N))
+ Hash += HalfTokens;
+ return Hash;
+ }
+};
+
// Apply opt overrides.
AllocTokenOptions &&transformOptionsFromCl(AllocTokenOptions &&Opts) {
if (!Opts.MaxTokens.has_value())
@@ -244,6 +284,9 @@ class AllocToken {
case TokenMode::TypeHash:
Mode.emplace<TypeHashMode>(*Options.MaxTokens);
break;
+ case TokenMode::TypeHashPointerSplit:
+ Mode.emplace<TypeHashPointerSplitMode>(*Options.MaxTokens);
+ break;
case TokenMode::ModeCount:
llvm_unreachable("");
break;
@@ -281,7 +324,9 @@ class AllocToken {
// Cache for replacement functions.
DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions;
// Selected mode.
- std::variant<IncrementMode, RandomMode, TypeHashMode> Mode;
+ std::variant<IncrementMode, RandomMode, TypeHashMode,
+ TypeHashPointerSplitMode>
+ Mode;
};
bool AllocToken::instrumentFunction(Function &F) {
diff --git a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
index 37c6d6463ffd2..94fc12b20caff 100644
--- a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
+++ b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
@@ -29,4 +29,4 @@ entry:
ret ptr %ptr1
}
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
index be99e5ef14b16..b933c0bd352e9 100644
--- a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
+++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
@@ -60,4 +60,4 @@ entry:
ret ptr %ptr1
}
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
diff --git a/llvm/test/Instrumentation/AllocToken/remark.ll b/llvm/test/Instrumentation/AllocToken/remark.ll
index d5ecfc41bace5..d2bc273060403 100644
--- a/llvm/test/Instrumentation/AllocToken/remark.ll
+++ b/llvm/test/Instrumentation/AllocToken/remark.ll
@@ -24,4 +24,4 @@ entry:
ret ptr %ptr1
}
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
``````````
</details>
https://github.com/llvm/llvm-project/pull/156840
More information about the llvm-branch-commits
mailing list