[llvm-branch-commits] [clang] [Clang][CodeGen] Implement code generation for __builtin_infer_alloc_token() (PR #156842)
Marco Elver via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Oct 17 11:15:05 PDT 2025
https://github.com/melver updated https://github.com/llvm/llvm-project/pull/156842
>From 019499fd0f441ee14e71392f6d1b1219734163a3 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver at google.com>
Date: Wed, 15 Oct 2025 15:13:27 +0200
Subject: [PATCH] force rebase
---
clang/docs/AllocToken.rst | 43 ++++++++---
clang/docs/ReleaseNotes.rst | 3 +
clang/lib/CodeGen/BackendUtil.cpp | 28 ++++---
clang/lib/CodeGen/CGBuiltin.cpp | 9 +++
clang/test/CodeGen/lto-newpm-pipeline.c | 8 +-
clang/test/CodeGenCXX/alloc-token-builtin.cpp | 77 +++++++++++++++++++
6 files changed, 146 insertions(+), 22 deletions(-)
create mode 100644 clang/test/CodeGenCXX/alloc-token-builtin.cpp
diff --git a/clang/docs/AllocToken.rst b/clang/docs/AllocToken.rst
index b65e18ccfa967..1a740e5e22c29 100644
--- a/clang/docs/AllocToken.rst
+++ b/clang/docs/AllocToken.rst
@@ -49,6 +49,39 @@ change or removal. These may (experimentally) be selected with ``-Xclang
* ``increment``: This mode assigns a simple, incrementally increasing token ID
to each allocation site.
+The following command-line options affect generated token IDs:
+
+* ``-falloc-token-max=<N>``
+ Configures the maximum number of tokens. No max by default (tokens bounded
+ by ``SIZE_MAX``).
+
+Querying Token IDs with ``__builtin_infer_alloc_token``
+=======================================================
+
+For use cases where the token ID must be known at compile time, Clang provides
+a builtin function:
+
+.. code-block:: c
+
+ size_t __builtin_infer_alloc_token(<args>, ...);
+
+This builtin returns the token ID inferred from its argument expressions, which
+mirror arguments normally passed to any allocation function. The argument
+expressions are **unevaluated**, so it can be used with expressions that would
+have side effects without any runtime impact.
+
+For example, it can be used as follows:
+
+.. code-block:: c
+
+ struct MyType { ... };
+ void *__partition_alloc(size_t size, size_t partition);
+ #define partition_alloc(...) __partition_alloc(__VA_ARGS__, __builtin_infer_alloc_token(__VA_ARGS__))
+
+ void foo(void) {
+ MyType *x = partition_alloc(sizeof(*x));
+ }
+
Allocation Token Instrumentation
================================
@@ -70,16 +103,6 @@ example:
// Instrumented:
ptr = __alloc_token_malloc(size, <token id>);
-The following command-line options affect generated token IDs:
-
-* ``-falloc-token-max=<N>``
- Configures the maximum number of tokens. No max by default (tokens bounded
- by ``SIZE_MAX``).
-
- .. code-block:: console
-
- % clang++ -fsanitize=alloc-token -falloc-token-max=512 example.cc
-
Runtime Interface
-----------------
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index db2b0f6fd5027..5bd3a071ee33e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -268,6 +268,9 @@ Non-comprehensive list of changes in this release
allocator-level heap organization strategies. A feature to instrument all
allocation functions with a token ID can be enabled via the
``-fsanitize=alloc-token`` flag.
+- A builtin ``__builtin_infer_alloc_token(<args>, ...)`` is provided to allow
+ compile-time querying of allocation token IDs, where the builtin arguments
+ mirror those normally passed to an allocation function.
New Compiler Flags
------------------
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 23ad11ac9f792..2be61f2479e44 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -803,16 +803,6 @@ static void addSanitizers(const Triple &TargetTriple,
MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles,
PB.getVirtualFileSystemPtr()));
}
-
- if (LangOpts.Sanitize.has(SanitizerKind::AllocToken)) {
- if (Level == OptimizationLevel::O0) {
- // The default pass builder only infers libcall function attrs when
- // optimizing, so we insert it here because we need it for accurate
- // memory allocation function detection.
- MPM.addPass(InferFunctionAttrsPass());
- }
- MPM.addPass(AllocTokenPass(getAllocTokenOptions(LangOpts, CodeGenOpts)));
- }
};
if (ClSanitizeOnOptimizerEarlyEP) {
PB.registerOptimizerEarlyEPCallback(
@@ -855,6 +845,23 @@ static void addSanitizers(const Triple &TargetTriple,
}
}
+static void addAllocTokenPass(const Triple &TargetTriple,
+ const CodeGenOptions &CodeGenOpts,
+ const LangOptions &LangOpts, PassBuilder &PB) {
+ PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase) {
+ if (Level == OptimizationLevel::O0 &&
+ LangOpts.Sanitize.has(SanitizerKind::AllocToken)) {
+ // The default pass builder only infers libcall function attrs when
+ // optimizing, so we insert it here because we need it for accurate
+ // memory allocation function detection with -fsanitize=alloc-token.
+ MPM.addPass(InferFunctionAttrsPass());
+ }
+ MPM.addPass(AllocTokenPass(getAllocTokenOptions(LangOpts, CodeGenOpts)));
+ });
+}
+
void EmitAssemblyHelper::RunOptimizationPipeline(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS, BackendConsumer *BC) {
@@ -1109,6 +1116,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!IsThinLTOPostLink) {
addSanitizers(TargetTriple, CodeGenOpts, LangOpts, PB);
addKCFIPass(TargetTriple, LangOpts, PB);
+ addAllocTokenPass(TargetTriple, CodeGenOpts, LangOpts, PB);
}
if (std::optional<GCOVOptions> Options =
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9ee810c9d5775..8a85789a256d5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4525,6 +4525,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(AI);
}
+ case Builtin::BI__builtin_infer_alloc_token: {
+ llvm::MDNode *MDN = buildAllocToken(E);
+ llvm::Value *MDV = MetadataAsValue::get(getLLVMContext(), MDN);
+ llvm::Function *F =
+ CGM.getIntrinsic(llvm::Intrinsic::alloc_token_id, {IntPtrTy});
+ llvm::CallBase *TokenID = Builder.CreateCall(F, MDV);
+ return RValue::get(TokenID);
+ }
+
case Builtin::BIbzero:
case Builtin::BI__builtin_bzero: {
Address Dest = EmitPointerWithAlignment(E->getArg(0));
diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c
index ea9784a76f923..dceaaf136ebfc 100644
--- a/clang/test/CodeGen/lto-newpm-pipeline.c
+++ b/clang/test/CodeGen/lto-newpm-pipeline.c
@@ -32,10 +32,12 @@
// CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass
// CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
// CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper
+// CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass
+// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
// CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
// CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
// CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass
-// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
// CHECK-FULL-O0-NEXT: Running pass: VerifierPass
// CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
@@ -46,10 +48,12 @@
// CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass
// CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
// CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper
+// CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass
+// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
// CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
// CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
// CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass
-// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
// CHECK-THIN-O0-NEXT: Running pass: VerifierPass
// CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
diff --git a/clang/test/CodeGenCXX/alloc-token-builtin.cpp b/clang/test/CodeGenCXX/alloc-token-builtin.cpp
new file mode 100644
index 0000000000000..2d0b8e1666faf
--- /dev/null
+++ b/clang/test/CodeGenCXX/alloc-token-builtin.cpp
@@ -0,0 +1,77 @@
+// To test IR generation of the builtin without evaluating the LLVM intrinsic,
+// we set the mode to a stateful mode, which prohibits constant evaluation.
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-mode=random -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CODEGEN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-max=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LOWER
+
+extern "C" void *my_malloc(unsigned long, unsigned long);
+
+struct NoPtr {
+ int x;
+ long y;
+};
+
+struct WithPtr {
+ int a;
+ char *buf;
+};
+
+int unevaluated_fn();
+
+// CHECK-LABEL: @_Z16test_builtin_intv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_int() {
+ return __builtin_infer_alloc_token(sizeof(1));
+}
+
+// CHECK-LABEL: @_Z16test_builtin_ptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_PTR:[0-9]+]])
+// CHECK-LOWER: ret i64 1
+unsigned long test_builtin_ptr() {
+ return __builtin_infer_alloc_token(sizeof(int *));
+}
+
+// CHECK-LABEL: @_Z25test_builtin_struct_noptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_NOPTR:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_struct_noptr() {
+ return __builtin_infer_alloc_token(sizeof(NoPtr));
+}
+
+// CHECK-LABEL: @_Z25test_builtin_struct_w_ptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_WITHPTR:[0-9]+]])
+// CHECK-LOWER: ret i64 1
+unsigned long test_builtin_struct_w_ptr() {
+ return __builtin_infer_alloc_token(sizeof(WithPtr), 123);
+}
+
+// CHECK-LABEL: @_Z24test_builtin_unevaluatedv(
+// CHECK-NOT: call{{.*}}unevaluated_fn
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_unevaluated() {
+ return __builtin_infer_alloc_token(sizeof(int) * unevaluated_fn());
+}
+
+// CHECK-LABEL: @_Z36test_builtin_unsequenced_unevaluatedi(
+// CHECK: add nsw
+// CHECK-NOT: add nsw
+// CHECK-CODEGEN: %[[REG:[0-9]+]] = call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]])
+// CHECK-CODEGEN: call{{.*}}@my_malloc({{.*}}, i64 noundef %[[REG]])
+// CHECK-LOWER: call{{.*}}@my_malloc({{.*}}, i64 noundef 0)
+void test_builtin_unsequenced_unevaluated(int x) {
+ my_malloc(++x, __builtin_infer_alloc_token(++x));
+}
+
+// CHECK-LABEL: @_Z20test_builtin_unknownv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_unknown() {
+ return __builtin_infer_alloc_token(4096);
+}
+
+// CHECK-CODEGEN: ![[META_INT]] = !{!"int", i1 false}
+// CHECK-CODEGEN: ![[META_PTR]] = !{!"int *", i1 true}
+// CHECK-CODEGEN: ![[META_NOPTR]] = !{!"NoPtr", i1 false}
+// CHECK-CODEGEN: ![[META_WITHPTR]] = !{!"WithPtr", i1 true}
+// CHECK-CODEGEN: ![[META_UNKNOWN]] = !{}
More information about the llvm-branch-commits
mailing list