[llvm-branch-commits] [clang] [ExposeDirectMethod] Nil chech thunk generation (PR #170618)
Peter Rong via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 3 23:02:05 PST 2025
https://github.com/DataCorrupted created https://github.com/llvm/llvm-project/pull/170618
- Generation
- Dispatch
>From 81b0d800623f03b0c3fa9b4412e36dc585236ed8 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong at meta.com>
Date: Wed, 3 Dec 2025 22:42:51 -0800
Subject: [PATCH] [ExposeDirectMethod] Nil chech thunk generation
- Generation
- Dispatch
---
clang/lib/CodeGen/CGDecl.cpp | 4 +-
clang/lib/CodeGen/CGObjC.cpp | 17 +-
clang/lib/CodeGen/CGObjCMac.cpp | 243 +++++++++++++++++++++++++++-
clang/lib/CodeGen/CodeGenFunction.h | 7 +
4 files changed, 263 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 8b1cd83af2396..9f0e09eac8866 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -2757,7 +2757,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
LValue lv = MakeAddrLValue(DeclPtr, Ty);
- if (IsScalar) {
+ // If this is a thunk, don't bother with ARC lifetime management.
+ // The true implementation will take care of that.
+ if (IsScalar && !CurFuncIsThunk) {
Qualifiers qs = Ty.getQualifiers();
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
// We honor __attribute__((ns_consumed)) for types with lifetime.
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 10aad2e26938d..f1b8627fc119a 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -761,7 +761,18 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD);
if (OMD->isDirectMethod()) {
- Fn->setVisibility(llvm::Function::HiddenVisibility);
+ Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ if (CGM.shouldExposeSymbol(OMD)) {
+ // Find the decl that may have visibility set (property or method)
+ const NamedDecl *Decl = OMD;
+ if (const auto *PD = OMD->findPropertyDecl()) {
+ Decl = PD;
+ }
+ // and respect source level visibility setting
+ if (auto V = Decl->getExplicitVisibility(NamedDecl::VisibilityForValue)) {
+ Fn->setVisibility(CGM.GetLLVMVisibility(*V));
+ }
+ }
CGM.SetLLVMFunctionAttributes(OMD, FI, Fn, /*IsThunk=*/false);
CGM.SetLLVMFunctionAttributesForDefinition(OMD, Fn);
} else {
@@ -781,10 +792,6 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
OMD->getLocation(), StartLoc);
if (OMD->isDirectMethod()) {
- // This function is a direct call, it has to implement a nil check
- // on entry.
- //
- // TODO: possibly have several entry points to elide the check
CGM.getObjCRuntime().GenerateDirectMethodPrologue(*this, Fn, OMD, CD);
}
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 3f4b11c634ce4..741e5d85b5935 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -1066,6 +1066,15 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
DirectMethodInfo &GenerateDirectMethod(const ObjCMethodDecl *OMD,
const ObjCContainerDecl *CD);
+ llvm::Function *GenerateObjCDirectThunk(const ObjCMethodDecl *OMD,
+ const ObjCContainerDecl *CD,
+ llvm::Function *Implementation);
+
+ llvm::Function *GetDirectMethodCallee(const ObjCMethodDecl *OMD,
+ const ObjCContainerDecl *CD,
+ bool ReceiverCanBeNull,
+ bool ClassObjectCanBeUnrealized);
+
/// Generate class realization code: [self self]
/// This is used for class methods to ensure the class is initialized.
/// Returns the realized class object.
@@ -2094,6 +2103,9 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
bool ReceiverCanBeNull =
canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0);
+ bool ClassObjectCanBeUnrealized =
+ Method && Method->isClassMethod() &&
+ canClassObjectBeUnrealized(ClassReceiver, CGF);
bool RequiresNullCheck = false;
bool RequiresSelValue = true;
@@ -2101,7 +2113,11 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
llvm::FunctionCallee Fn = nullptr;
if (Method && Method->isDirectMethod()) {
assert(!IsSuper);
- Fn = GenerateDirectMethod(Method, Method->getClassInterface());
+ // Use GetDirectMethodCallee to decide whether to use implementation or
+ // thunk.
+ Fn = GetDirectMethodCallee(Method, Method->getClassInterface(),
+ ReceiverCanBeNull, ClassObjectCanBeUnrealized);
+
// Direct methods will synthesize the proper `_cmd` internally,
// so just don't bother with setting the `_cmd` argument.
RequiresSelValue = false;
@@ -2138,6 +2154,23 @@ CodeGen::RValue CGObjCCommonMac::EmitMessageSend(
if (!RequiresNullCheck && Method && Method->hasParamDestroyedInCallee())
RequiresNullCheck = true;
+ if (CGM.shouldHaveNilCheckInline(Method)) {
+ // For variadic class methods, we need to inline pre condition checks. That
+ // include two things:
+ // 1. if this is a class method, we have to realize the class if we are not
+ // sure.
+ if (ClassReceiver && ClassObjectCanBeUnrealized) {
+ // Perform class realization using the helper function
+ Arg0 = GenerateClassRealization(CGF, Arg0, ClassReceiver);
+ ActualArgs[0] = CallArg(RValue::get(Arg0), ActualArgs[0].Ty);
+ }
+ // 2. inline the nil check if we are not sure if the receiver can be null.
+ // Luckly, `NullReturnState` already does that for corner cases like
+ // ns_consume, we only need to override the flag, even if return value is
+ // unused.
+ RequiresNullCheck |= ReceiverCanBeNull;
+ }
+
NullReturnState nullReturn;
if (RequiresNullCheck) {
nullReturn.init(CGF, Arg0);
@@ -3912,6 +3945,8 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD,
llvm::FunctionType *MethodTy =
Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD));
+ bool ExposeSymbol = CGM.shouldExposeSymbol(OMD);
+
if (OldFn) {
Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
"", &CGM.getModule());
@@ -3921,10 +3956,30 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD,
// Replace the cached implementation in the map.
I->second.Implementation = Fn;
+ llvm::Function *OldThunk = I->second.Thunk;
+ // If implementation was replaced, and old thunk exists, invalidate the old
+ // thunk
+ //
+ // TODO: ideally, new thunk shouldn't be necessary, if the different return
+ // type are just subclasses, at IR level they are just pointers, i.e. the
+ // NewThunk and the OldThunk are identical.
+ if (OldThunk) {
+ llvm::Function *NewThunk = GenerateObjCDirectThunk(OMD, CD, Fn);
+
+ // Replace all uses before erasing
+ NewThunk->takeName(OldThunk);
+ OldThunk->replaceAllUsesWith(NewThunk);
+ OldThunk->eraseFromParent();
+
+ I->second.Thunk = NewThunk;
+ }
} else {
- auto Name = getSymbolNameForMethod(OMD, /*include category*/ false);
+ // Generate symbol without \01 prefix when optimization enabled
+ auto Name = getSymbolNameForMethod(OMD, /*include category*/ false,
+ /*includePrefixByte*/ !ExposeSymbol);
+ // ALWAYS use ExternalLinkage for true implementation
Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
Name, &CGM.getModule());
auto [It, inserted] = DirectMethodDefinitions.insert(std::make_pair(COMD, DirectMethodInfo(Fn)));
@@ -3936,6 +3991,190 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD,
return I->second;
}
+/// Start an Objective-C direct method thunk.
+///
+/// The thunk must use musttail to remain transparent to ARC - any
+/// ARC operations must happen in the caller, not in the thunk.
+void CodeGenFunction::StartObjCDirectThunk(const ObjCMethodDecl *OMD,
+ llvm::Function *Fn,
+ const CGFunctionInfo &FI)
+ {
+ // Mark this as a thunk function to disable ARC parameter processing
+ // and other thunk-inappropriate behavior.
+ CurFuncIsThunk = true;
+
+ // Build argument list for StartFunction.
+ // We must include all parameters to match the thunk's LLVM function type.
+ // The thunk uses musttail to forward all arguments directly, so ARC
+ // processing in the prolog is harmless - the parameters are forwarded
+ // as-is without local copies.
+ FunctionArgList FunctionArgs;
+ FunctionArgs.push_back(OMD->getSelfDecl());
+ FunctionArgs.append(OMD->param_begin(), OMD->param_end());
+
+ // The Start/Finish thunk pattern is borrowed from CGVTables.cpp
+ // for C++ virtual method thunks, but adapted for ObjC direct methods.
+ //
+ // Like C++ thunks, we don't have an actual AST body for the thunk - we only
+ // have the method's parameter declarations. Therefore, we pass empty
+ // `GlobalDecl` to `StartFunction` ...
+ StartFunction(GlobalDecl(), OMD->getReturnType(), Fn, FI, FunctionArgs,
+ OMD->getLocation(), OMD->getLocation());
+
+ // and manually set the decl afterwards so other utilities / helpers in CGF
+ // can still access the AST (e.g. arrange function arguments)
+ CurCodeDecl = OMD;
+ CurFuncDecl = OMD;
+}
+
+/// Finish an Objective-C direct method thunk.
+void CodeGenFunction::FinishObjCDirectThunk() {
+ // Create a dummy block to return the value of the thunk.
+ //
+ // The non-nil branch alredy returned because of musttail.
+ // Only nil branch will jump to this return block.
+ // If the nil check is not emitted (for class methods), this will be a dead
+ // block.
+ //
+ // Either way, the LLVM optimizer will simplify it later. This is just to make
+ // CFG happy.
+ EmitBlock(createBasicBlock("dummy_ret_block"));
+
+ // Disable the final ARC autorelease.
+ // Thunk functions are tailcall to actual implementation, so it doesn't need
+ // to worry about ARC.
+ AutoreleaseResult = false;
+
+ // Clear these to restore the invariants expected by
+ // StartFunction/FinishFunction.
+ CurCodeDecl = nullptr;
+ CurFuncDecl = nullptr;
+
+ FinishFunction();
+}
+
+llvm::Function *
+CGObjCCommonMac::GenerateObjCDirectThunk(const ObjCMethodDecl *OMD,
+ const ObjCContainerDecl *CD,
+ llvm::Function *Implementation) {
+
+ assert(CGM.shouldHaveNilCheckThunk(OMD) &&
+ "Should only generate thunk when optimization enabled");
+ assert(Implementation && "Implementation must exist");
+
+ llvm::FunctionType *ThunkTy = Implementation->getFunctionType();
+ std::string ThunkName = Implementation->getName().str() + "_thunk";
+
+ // Create thunk with linkonce_odr linkage (allows deduplication)
+ llvm::Function *Thunk =
+ llvm::Function::Create(ThunkTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ ThunkName, &CGM.getModule());
+
+ // Thunks should always have hidden visibility, other link units will have
+ // their own version of the (identical) thunk. If they make cross link-unit
+ // call, they are either calling through their thunk or directly dispatching
+ // to the true implementation, so making thunk visibile is meaningless.
+ Thunk->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ Thunk->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ // Start the ObjC direct thunk (sets up state and calls StartFunction)
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeObjCMethodDeclaration(OMD);
+
+ // Create a CodeGenFunction to generate the thunk body
+ CodeGenFunction CGF(CGM);
+ CGF.StartObjCDirectThunk(OMD, Thunk, FI);
+
+ // Copy function-level attributes from implementation to make musttail happy
+ llvm::AttributeList ImplAttrs = Implementation->getAttributes();
+ Thunk->setAttributes(ImplAttrs);
+
+ // - [self self] for class methods (class realization)
+ // - if (self == nil) branch to nil block with zero return
+ // - continuation block for non-nil case
+ GenerateDirectMethodsPreconditionCheck(CGF, Thunk, OMD, CD);
+
+ // Now emit the musttail call to the true implementation
+ // Collect all arguments for forwarding
+ SmallVector<llvm::Value *, 8> Args;
+ for (auto &Arg : Thunk->args())
+ Args.push_back(&Arg);
+
+ // Create musttail call to the implementation
+ llvm::CallInst *Call = CGF.Builder.CreateCall(Implementation, Args);
+ Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
+
+ // Apply call-site attributes using ConstructAttributeList
+ // When sret is used, the call must have matching sret attributes on the first
+ // parameter for musttail to work correctly. This mirrors what C++ thunks do
+ // in EmitMustTailThunk.
+ unsigned CallingConv;
+ llvm::AttributeList Attrs;
+ CGM.ConstructAttributeList(Implementation->getName(), FI, GlobalDecl(OMD),
+ Attrs, CallingConv, /*AttrOnCallSite=*/true,
+ /*IsThunk=*/false);
+ Call->setAttributes(Attrs);
+ Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+ // Immediately return the call result (musttail requirement)
+ if (FI.getReturnInfo().isIndirect()) {
+ // SRet case: return void
+ CGF.Builder.CreateRetVoid();
+ } else {
+ if (ThunkTy->getReturnType()->isVoidTy())
+ CGF.Builder.CreateRetVoid();
+ else
+ CGF.Builder.CreateRet(Call);
+ }
+
+ // Finish the ObjC direct thunk (creates dummy block and calls FinishFunction)
+ CGF.FinishObjCDirectThunk();
+ return Thunk;
+}
+
+llvm::Function *CGObjCCommonMac::GetDirectMethodCallee(
+ const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD,
+ bool ReceiverCanBeNull, bool ClassObjectCanBeUnrealized) {
+
+ // Get from cache or populate the function declaration lazily
+ DirectMethodInfo &Info = GenerateDirectMethod(OMD, CD);
+
+ // If optimization not enabled, always use implementation (which includes the
+ // nil check)
+ if (!CGM.shouldExposeSymbol(OMD)) {
+ return Info.Implementation;
+ }
+
+ // Varidic methods doesn't have thunk, the caller need to inline the nil check
+ if (CGM.shouldHaveNilCheckInline(OMD)) {
+ return Info.Implementation;
+ }
+
+ // Thunk is lazily generated.
+ auto getOrCreateThunk = [&]() {
+ if (!Info.Thunk)
+ Info.Thunk = GenerateObjCDirectThunk(OMD, CD, Info.Implementation);
+ return Info.Thunk;
+ };
+
+ assert(CGM.shouldHaveNilCheckThunk(OMD) &&
+ "a method either has nil check thunk or have thunk inlined when "
+ "exposing its symbol");
+
+ if (OMD->isInstanceMethod()) {
+ // If we can prove instance methods receiver is not null, return the true
+ // implementation
+ return ReceiverCanBeNull ? getOrCreateThunk() : Info.Implementation;
+ }
+ if (OMD->isClassMethod()) {
+ // For class methods, it need to be non-null and realized before we dispatch
+ // to true implementation
+ return (ReceiverCanBeNull || ClassObjectCanBeUnrealized)
+ ? getOrCreateThunk()
+ : Info.Implementation;
+ }
+ llvm_unreachable("OMD should either be a class method or instance method");
+}
+
llvm::Value *
CGObjCCommonMac::GenerateClassRealization(CodeGenFunction &CGF,
llvm::Value *classObject,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8c4c1c8c2dc95..f507146b37cc5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2422,6 +2422,13 @@ class CodeGenFunction : public CodeGenTypeCache {
void FinishThunk();
+ /// Start an Objective-C direct method thunk.
+ void StartObjCDirectThunk(const ObjCMethodDecl *OMD, llvm::Function *Fn,
+ const CGFunctionInfo &FI);
+
+ /// Finish an Objective-C direct method thunk.
+ void FinishObjCDirectThunk();
+
/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr,
llvm::FunctionCallee Callee);
More information about the llvm-branch-commits
mailing list