[llvm] 8dbc152 - [AllocToken] Introduce llvm.alloc.token.id intrinsic (#163632)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Oct 21 09:50:49 PDT 2025
    
    
  
Author: Marco Elver
Date: 2025-10-21T18:50:44+02:00
New Revision: 8dbc1527f7ad4197dfff8ea598634a8063bb6083
URL: https://github.com/llvm/llvm-project/commit/8dbc1527f7ad4197dfff8ea598634a8063bb6083
DIFF: https://github.com/llvm/llvm-project/commit/8dbc1527f7ad4197dfff8ea598634a8063bb6083.diff
LOG: [AllocToken] Introduce llvm.alloc.token.id intrinsic (#163632)
Introduce a new intrinsic, `llvm.alloc.token.id`, to allow compile-time
querying of allocation token IDs.
The `AllocToken` pass is taught to recognize and lower this intrinsic.
It extracts the `!alloc_token` metadata from the intrinsic's argument,
feeds it into the same token-generation logic used for instrumenting allocation
calls, and replaces the intrinsic with the resulting constant integer token ID.
This is a prerequisite for `__builtin_infer_alloc_token`. The pass now
runs on all functions to ensure intrinsics are lowered, but continues to
only instrument allocation calls in functions with the
`sanitize_alloc_token` attribute.
Added: 
    llvm/test/Instrumentation/AllocToken/intrinsic.ll
    llvm/test/Instrumentation/AllocToken/intrinsic32.ll
Modified: 
    llvm/include/llvm/IR/Intrinsics.td
    llvm/lib/Transforms/Instrumentation/AllocToken.cpp
Removed: 
    
################################################################################
diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 12d1c2528f977..e6cce9a4eea1d 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2851,7 +2851,15 @@ def int_ptrauth_blend :
 def int_ptrauth_sign_generic :
   DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
 
+//===----------------- AllocToken Intrinsics ------------------------------===//
+
+// Return the token ID for the given !alloc_token metadata.
+def int_alloc_token_id :
+  DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
+                        [IntrNoMem, NoUndef<RetIndex>]>;
+
 //===----------------------------------------------------------------------===//
+
 //===------- Convergence Intrinsics ---------------------------------------===//
 
 def int_experimental_convergence_entry
diff  --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 40720ae4b39ae..29968b834cfce 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
@@ -131,7 +132,7 @@ cl::opt<uint64_t> ClFallbackToken(
 
 //===--- Statistics -------------------------------------------------------===//
 
-STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumFunctionsModified, "Functions modified");
 STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
 
 //===----------------------------------------------------------------------===//
@@ -140,9 +141,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
 ///
 /// Expected format is: !{<type-name>, <contains-pointer>}
 MDNode *getAllocTokenMetadata(const CallBase &CB) {
-  MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
-  if (!Ret)
-    return nullptr;
+  MDNode *Ret = nullptr;
+  if (auto *II = dyn_cast<IntrinsicInst>(&CB);
+      II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+    auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
+    Ret = cast<MDNode>(MDV->getMetadata());
+    // If the intrinsic has an empty MDNode, type inference failed.
+    if (Ret->getNumOperands() == 0)
+      return nullptr;
+  } else {
+    Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+    if (!Ret)
+      return nullptr;
+  }
   assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
   assert(isa<MDString>(Ret->getOperand(0)));
   assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
@@ -315,6 +326,9 @@ class AllocToken {
   FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
                                        LibFunc OriginalFunc);
 
+  /// Lower alloc_token_* intrinsics.
+  void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
+
   /// Return the token ID from metadata in the call.
   uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
     return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
@@ -336,21 +350,32 @@ bool AllocToken::instrumentFunction(Function &F) {
   // Do not apply any instrumentation for naked functions.
   if (F.hasFnAttribute(Attribute::Naked))
     return false;
-  if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
-    return false;
   // Don't touch available_externally functions, their actual body is elsewhere.
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
     return false;
-  // Only instrument functions that have the sanitize_alloc_token attribute.
-  if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
-    return false;
 
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
   SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+  SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
+
+  // Only instrument functions that have the sanitize_alloc_token attribute.
+  const bool InstrumentFunction =
+      F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
+      !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
 
   // Collect all allocation calls to avoid iterator invalidation.
   for (Instruction &I : instructions(F)) {
+    // Collect all alloc_token_* intrinsics.
+    if (auto *II = dyn_cast<IntrinsicInst>(&I);
+        II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+      IntrinsicInsts.emplace_back(II);
+      continue;
+    }
+
+    if (!InstrumentFunction)
+      continue;
+
     auto *CB = dyn_cast<CallBase>(&I);
     if (!CB)
       continue;
@@ -359,11 +384,21 @@ bool AllocToken::instrumentFunction(Function &F) {
   }
 
   bool Modified = false;
-  for (auto &[CB, Func] : AllocCalls)
-    Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
 
-  if (Modified)
-    NumFunctionsInstrumented++;
+  if (!AllocCalls.empty()) {
+    for (auto &[CB, Func] : AllocCalls)
+      Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+    if (Modified)
+      NumFunctionsModified++;
+  }
+
+  if (!IntrinsicInsts.empty()) {
+    for (auto *II : IntrinsicInsts)
+      replaceIntrinsicInst(II, ORE);
+    Modified = true;
+    NumFunctionsModified++;
+  }
+
   return Modified;
 }
 
@@ -381,7 +416,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB,
   if (TLI.getLibFunc(*Callee, Func)) {
     if (isInstrumentableLibFunc(Func, CB, TLI))
       return Func;
-  } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+  } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
     return NotLibFunc;
   }
 
@@ -528,6 +563,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
   return TokenAlloc;
 }
 
+void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
+                                      OptimizationRemarkEmitter &ORE) {
+  assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
+
+  uint64_t TokenID = getToken(*II, ORE);
+  Value *V = ConstantInt::get(IntPtrTy, TokenID);
+  II->replaceAllUsesWith(V);
+  II->eraseFromParent();
+}
+
 } // namespace
 
 AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
diff  --git a/llvm/test/Instrumentation/AllocToken/intrinsic.ll b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
new file mode 100644
index 0000000000000..13aaa90008a7c
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i64 @llvm.alloc.token.id.i64(metadata)
+
+define i64 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %token_no_ptr = call i64 @llvm.alloc.token.id.i64(metadata !0)
+  ret i64 %token_no_ptr
+}
+
+define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i64 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i64 1
+;
+entry:
+  %token_with_ptr = call i64 @llvm.alloc.token.id.i64(metadata !1)
+  ret i64 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
diff  --git a/llvm/test/Instrumentation/AllocToken/intrinsic32.ll b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
new file mode 100644
index 0000000000000..eb5dbbe91a83e
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/intrinsic32.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test that the alloc-token pass lowers the intrinsic to a constant token ID.
+;
+; RUN: opt < %s -passes=alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+declare i32 @llvm.alloc.token.id.i32(metadata)
+
+define i32 @test_intrinsic_lowering() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %token_no_ptr = call i32 @llvm.alloc.token.id.i32(metadata !0)
+  ret i32 %token_no_ptr
+}
+
+define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-LABEL: define i32 @test_intrinsic_lowering_ptr() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  %token_with_ptr = call i32 @llvm.alloc.token.id.i32(metadata !1)
+  ret i32 %token_with_ptr
+}
+
+!0 = !{!"NoPointerType", i1 false}
+!1 = !{!"PointerType", i1 true}
        
    
    
More information about the llvm-commits
mailing list