[clang] [Clang][CodeGen][Sema] Fix crash when compiling naked lambdas (PR #165524)

Thu Oct 30 03:28:18 PDT 2025

https://github.com/alcxpr updated https://github.com/llvm/llvm-project/pull/165524

>From f303f139966b6753718aa3e12a2b3e2dc4ef5a4c Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Wed, 29 Oct 2025 14:59:42 +0700
Subject: [PATCH 1/8] [Clang][CodeGen]  Fix crash when compiling naked lambdas

Skip instance and lambda prologue emission when a lambda is marked `naked`,
preventing invalid access to `this` during code generation.
---
 clang/lib/CodeGen/CodeGenFunction.cpp  | 82 +++++++++++++++-----------
 clang/test/CodeGenCXX/naked-lambda.cpp | 20 +++++++
 2 files changed, 66 insertions(+), 36 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/naked-lambda.cpp

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 88628530cf66b..11e9e708f9b73 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -46,6 +46,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CRC.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/xxhash.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -1271,50 +1272,59 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     }
   }
 
-  EmitFunctionProlog(*CurFnInfo, CurFn, Args);
+    EmitFunctionProlog(*CurFnInfo, CurFn, Args);
 
   if (const CXXMethodDecl *MD = dyn_cast_if_present<CXXMethodDecl>(D);
       MD && !MD->isStatic()) {
     bool IsInLambda =
         MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call;
-    if (MD->isImplicitObjectMemberFunction())
-      CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
-    if (IsInLambda) {
-      // We're in a lambda; figure out the captures.
-      MD->getParent()->getCaptureFields(LambdaCaptureFields,
-                                        LambdaThisCaptureField);
-      if (LambdaThisCaptureField) {
-        // If the lambda captures the object referred to by '*this' - either by
-        // value or by reference, make sure CXXThisValue points to the correct
-        // object.
-
-        // Get the lvalue for the field (which is a copy of the enclosing object
-        // or contains the address of the enclosing object).
-        LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
-        if (!LambdaThisCaptureField->getType()->isPointerType()) {
-          // If the enclosing object was captured by value, just use its
-          // address. Sign this pointer.
-          CXXThisValue = ThisFieldLValue.getPointer(*this);
-        } else {
-          // Load the lvalue pointed to by the field, since '*this' was captured
-          // by reference.
-          CXXThisValue =
-              EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+
+    const FunctionDecl *FD = dyn_cast_if_present<FunctionDecl>(D);
+    bool IsNaked = FD && FD->hasAttr<NakedAttr>();
+
+    if (!IsNaked) {
+      if (MD->isImplicitObjectMemberFunction())
+        CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+
+      if (IsInLambda) {
+        // We're in a lambda; figure out the captures.
+        MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                          LambdaThisCaptureField);
+        if (LambdaThisCaptureField) {
+          // If the lambda captures the object referred to by '*this' - either by
+          // value or by reference, make sure CXXThisValue points to the correct
+          // object.
+
+          // Get the lvalue for the field (which is a copy of the enclosing object
+          // or contains the address of the enclosing object).
+          LValue ThisFieldLValue =
+              EmitLValueForLambdaField(LambdaThisCaptureField);
+          if (!LambdaThisCaptureField->getType()->isPointerType()) {
+            // If the enclosing object was captured by value, just use its
+            // address. Sign this pointer.
+            CXXThisValue = ThisFieldLValue.getPointer(*this);
+          } else {
+            // Load the lvalue pointed to by the field, since '*this' was captured
+            // by reference.
+            CXXThisValue =
+                EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+          }
         }
-      }
-      for (auto *FD : MD->getParent()->fields()) {
-        if (FD->hasCapturedVLAType()) {
-          auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD),
-                                           SourceLocation()).getScalarVal();
-          auto VAT = FD->getCapturedVLAType();
-          VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+
+        for (auto *FD : MD->getParent()->fields()) {
+          if (FD->hasCapturedVLAType()) {
+            auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD),
+                                             SourceLocation()).getScalarVal();
+            auto VAT = FD->getCapturedVLAType();
+            VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+          }
         }
+      } else if (MD->isImplicitObjectMemberFunction()) {
+        // Not in a lambda; just use 'this' from the method.
+        // FIXME: Should we generate a new load for each use of 'this'?  The
+        // fast register allocator would be happier...
+        CXXThisValue = CXXABIThisValue;
       }
-    } else if (MD->isImplicitObjectMemberFunction()) {
-      // Not in a lambda; just use 'this' from the method.
-      // FIXME: Should we generate a new load for each use of 'this'?  The
-      // fast register allocator would be happier...
-      CXXThisValue = CXXABIThisValue;
     }
 
     // Check the 'this' pointer once per function, if it's available.
diff --git a/clang/test/CodeGenCXX/naked-lambda.cpp b/clang/test/CodeGenCXX/naked-lambda.cpp
new file mode 100644
index 0000000000000..27ea1af09da01
--- /dev/null
+++ b/clang/test/CodeGenCXX/naked-lambda.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S %s -o - | FileCheck %s --check-prefix=ASM
+
+void test_naked_lambda() {
+  auto l = []() __attribute__((naked)) {
+    asm volatile("retq");
+  };
+  l();
+}
+
+// CHECK: define internal void @"_ZZ17test_naked_lambdavENK3$_0clEv"
+// CHECK-NOT: alloca
+// CHECK-NOT: store
+// CHECK-NOT: call void @_ZN
+// ASM-LABEL: _ZZ17test_naked_lambdavENK3$_0clEv:
+// ASM-NOT: push
+// ASM-NOT: pop
+// ASM: retq
+
+

>From 666aed6e0078e158e8539d842f3c8b88de74891f Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Wed, 29 Oct 2025 15:38:22 +0700
Subject: [PATCH 2/8] [Clang][CodeGen] Apply clang-format

---
 clang/lib/CodeGen/CodeGenFunction.cpp | 180 ++++++++++++++------------
 1 file changed, 94 insertions(+), 86 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 11e9e708f9b73..a4affa30f491a 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -115,9 +115,12 @@ llvm::fp::ExceptionBehavior
 clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
 
   switch (Kind) {
-  case LangOptions::FPE_Ignore:  return llvm::fp::ebIgnore;
-  case LangOptions::FPE_MayTrap: return llvm::fp::ebMayTrap;
-  case LangOptions::FPE_Strict:  return llvm::fp::ebStrict;
+  case LangOptions::FPE_Ignore:
+    return llvm::fp::ebIgnore;
+  case LangOptions::FPE_MayTrap:
+    return llvm::fp::ebMayTrap;
+  case LangOptions::FPE_Strict:
+    return llvm::fp::ebStrict;
   default:
     llvm_unreachable("Unsupported FP Exception Behavior");
   }
@@ -175,8 +178,7 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
          "FPConstrained should be enabled on entire function");
 
   auto mergeFnAttrValue = [&](StringRef Name, bool Value) {
-    auto OldValue =
-        CGF.CurFn->getFnAttribute(Name).getValueAsBool();
+    auto OldValue = CGF.CurFn->getFnAttribute(Name).getValueAsBool();
     auto NewValue = OldValue & Value;
     if (OldValue != NewValue)
       CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue));
@@ -216,8 +218,8 @@ CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T,
                                       IsKnownNonNull);
 }
 
-LValue
-CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
+LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V,
+                                                          QualType T) {
   return ::makeNaturalAlignAddrLValue(V, T, /*ForPointeeType*/ true,
                                       /*MightBeSigned*/ true, *this);
 }
@@ -330,7 +332,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
   // cleans up functions which started with a unified return block.
   if (ReturnBlock.getBlock()->hasOneUse()) {
     llvm::BranchInst *BI =
-      dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
+        dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
     if (BI && BI->isUnconditional() &&
         BI->getSuccessor(0) == ReturnBlock.getBlock()) {
       // Record/return the DebugLoc of the simple 'return' expression to be used
@@ -353,7 +355,8 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
 }
 
 static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) {
-  if (!BB) return;
+  if (!BB)
+    return;
   if (!BB->use_empty()) {
     CGF.CurFn->insert(CGF.CurFn->end(), BB);
     return;
@@ -375,9 +378,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
            "mismatched push/pop in convergence stack!");
   }
 
-  bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0
-    && NumSimpleReturnExprs == NumReturnExprs
-    && ReturnBlock.getBlock()->use_empty();
+  bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 &&
+                               NumSimpleReturnExprs == NumReturnExprs &&
+                               ReturnBlock.getBlock()->use_empty();
   // Usually the return expression is evaluated before the cleanup
   // code.  If the function contains only a simple return statement,
   // such as a constant, the location before the cleanup code becomes
@@ -445,8 +448,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
                      RetKeyInstructionsAtomGroup);
   EmitEndEHSpec(CurCodeDecl);
 
-  assert(EHStack.empty() &&
-         "did not remove all scopes from cleanup stack!");
+  assert(EHStack.empty() && "did not remove all scopes from cleanup stack!");
 
   // If someone did an indirect goto, emit the indirect goto block at the end of
   // the function.
@@ -656,7 +658,8 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getXDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getYDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getZDim())))};
-    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
+    Fn->setMetadata("work_group_size_hint",
+                    llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
@@ -667,7 +670,8 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getXDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getYDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getZDim())))};
-    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
+    Fn->setMetadata("reqd_work_group_size",
+                    llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const OpenCLIntelReqdSubGroupSizeAttr *A =
@@ -680,7 +684,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
 }
 
 /// Determine whether the function F ends with a return stmt.
-static bool endsWithReturn(const Decl* F) {
+static bool endsWithReturn(const Decl *F) {
   const Stmt *Body = nullptr;
   if (auto *FD = dyn_cast_or_null<FunctionDecl>(F))
     Body = FD->getBody();
@@ -870,7 +874,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       const IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0);
       if (OMD->getMethodFamily() == OMF_dealloc ||
           OMD->getMethodFamily() == OMF_initialize ||
-          (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) {
+          (OMD->getSelector().isUnarySelector() &&
+           II->isStr(".cxx_destruct"))) {
         markAsIgnoreThreadCheckingAtRuntime(Fn);
       }
     }
@@ -1183,14 +1188,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       if (CGM.getCodeGenOpts().MNopMCount) {
         if (!CGM.getCodeGenOpts().CallFEntry)
           CGM.getDiags().Report(diag::err_opt_not_valid_without_opt)
-            << "-mnop-mcount" << "-mfentry";
+              << "-mnop-mcount" << "-mfentry";
         Fn->addFnAttr("mnop-mcount");
       }
 
       if (CGM.getCodeGenOpts().RecordMCount) {
         if (!CGM.getCodeGenOpts().CallFEntry)
           CGM.getDiags().Report(diag::err_opt_not_valid_without_opt)
-            << "-mrecord-mcount" << "-mfentry";
+              << "-mrecord-mcount" << "-mfentry";
         Fn->addFnAttr("mrecord-mcount");
       }
     }
@@ -1200,7 +1205,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     if (getContext().getTargetInfo().getTriple().getArch() !=
         llvm::Triple::systemz)
       CGM.getDiags().Report(diag::err_opt_not_valid_on_target)
-        << "-mpacked-stack";
+          << "-mpacked-stack";
     Fn->addFnAttr("packed-stack");
   }
 
@@ -1237,8 +1242,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
     llvm::Function::arg_iterator EI = CurFn->arg_end();
     --EI;
-    llvm::Value *Addr = Builder.CreateStructGEP(
-        CurFnInfo->getArgStruct(), &*EI, Idx);
+    llvm::Value *Addr =
+        Builder.CreateStructGEP(CurFnInfo->getArgStruct(), &*EI, Idx);
     llvm::Type *Ty =
         cast<llvm::GetElementPtrInst>(Addr)->getResultElementType();
     ReturnValuePointer = Address(Addr, Ty, getPointerAlign());
@@ -1251,8 +1256,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     // Tell the epilog emitter to autorelease the result.  We do this
     // now so that various specialized functions can suppress it
     // during their IR-generation.
-    if (getLangOpts().ObjCAutoRefCount &&
-        !CurFnInfo->isReturnsRetained() &&
+    if (getLangOpts().ObjCAutoRefCount && !CurFnInfo->isReturnsRetained() &&
         RetTy->isObjCRetainableType())
       AutoreleaseResult = true;
   }
@@ -1272,7 +1276,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     }
   }
 
-    EmitFunctionProlog(*CurFnInfo, CurFn, Args);
+  EmitFunctionProlog(*CurFnInfo, CurFn, Args);
 
   if (const CXXMethodDecl *MD = dyn_cast_if_present<CXXMethodDecl>(D);
       MD && !MD->isStatic()) {
@@ -1291,12 +1295,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
         MD->getParent()->getCaptureFields(LambdaCaptureFields,
                                           LambdaThisCaptureField);
         if (LambdaThisCaptureField) {
-          // If the lambda captures the object referred to by '*this' - either by
-          // value or by reference, make sure CXXThisValue points to the correct
-          // object.
+          // If the lambda captures the object referred to by '*this' - either
+          // by value or by reference, make sure CXXThisValue points to the
+          // correct object.
 
-          // Get the lvalue for the field (which is a copy of the enclosing object
-          // or contains the address of the enclosing object).
+          // Get the lvalue for the field (which is a copy of the enclosing
+          // object or contains the address of the enclosing object).
           LValue ThisFieldLValue =
               EmitLValueForLambdaField(LambdaThisCaptureField);
           if (!LambdaThisCaptureField->getType()->isPointerType()) {
@@ -1304,17 +1308,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
             // address. Sign this pointer.
             CXXThisValue = ThisFieldLValue.getPointer(*this);
           } else {
-            // Load the lvalue pointed to by the field, since '*this' was captured
-            // by reference.
-            CXXThisValue =
-                EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+            // Load the lvalue pointed to by the field, since '*this' was
+            // captured by reference.
+            CXXThisValue = EmitLoadOfLValue(ThisFieldLValue, SourceLocation())
+                               .getScalarVal();
           }
         }
 
         for (auto *FD : MD->getParent()->fields()) {
           if (FD->hasCapturedVLAType()) {
-            auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD),
-                                             SourceLocation()).getScalarVal();
+            auto *ExprArg =
+                EmitLoadOfLValue(EmitLValueForLambdaField(FD), SourceLocation())
+                    .getScalarVal();
             auto VAT = FD->getCapturedVLAType();
             VLASizeMap[VAT->getSizeExpr()] = ExprArg;
           }
@@ -1416,7 +1421,8 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
 static void TryMarkNoThrow(llvm::Function *F) {
   // LLVM treats 'nounwind' on a function as part of the type, so we
   // can't do this on functions that can be overwritten.
-  if (F->isInterposable()) return;
+  if (F->isInterposable())
+    return;
 
   for (llvm::BasicBlock &BB : *F)
     for (llvm::Instruction &I : BB)
@@ -1584,8 +1590,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     EmitDestructorBody(Args);
   else if (isa<CXXConstructorDecl>(FD))
     EmitConstructorBody(Args);
-  else if (getLangOpts().CUDA &&
-           !getLangOpts().CUDAIsDevice &&
+  else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
            FD->hasAttr<CUDAGlobalAttr>())
     CGM.getCUDARuntime().emitDeviceStub(*this, Args);
   else if (isa<CXXMethodDecl>(FD) &&
@@ -1680,7 +1685,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
 /// that we can just remove the code.
 bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
   // Null statement, not a label!
-  if (!S) return false;
+  if (!S)
+    return false;
 
   // If this is a label, we have to emit the code, consider something like:
   // if (0) {  ...  foo:  bar(); }  goto foo;
@@ -1712,7 +1718,8 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
 /// inside of it, this is fine.
 bool CodeGenFunction::containsBreak(const Stmt *S) {
   // Null statement, not a label!
-  if (!S) return false;
+  if (!S)
+    return false;
 
   // If this is a switch or loop that defines its own break scope, then we can
   // include it and anything inside of it.
@@ -1732,7 +1739,8 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
 }
 
 bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) {
-  if (!S) return false;
+  if (!S)
+    return false;
 
   // Some statement kinds add a scope and thus never add a decl to the current
   // scope. Note, this list is longer than the list of statements that might
@@ -1785,11 +1793,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
   // to bool.
   Expr::EvalResult Result;
   if (!Cond->EvaluateAsInt(Result, getContext()))
-    return false;  // Not foldable, not integer or not fully evaluatable.
+    return false; // Not foldable, not integer or not fully evaluatable.
 
   llvm::APSInt Int = Result.Val.getInt();
   if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
-    return false;  // Contains a label.
+    return false; // Contains a label.
 
   PGO->markStmtMaybeUsed(Cond);
   ResultInt = Int;
@@ -2086,7 +2094,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(
     //   br(c ? throw x : y, t, f) -> br(c, br(throw x, t, f), br(y, t, f)
     // Fold this to:
     //   br(c, throw x, br(y, t, f))
-    EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/false);
+    EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/ false);
     return;
   }
 
@@ -2187,8 +2195,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   CGBuilderTy &Builder = CGF.Builder;
 
   CharUnits baseSize = CGF.getContext().getTypeSizeInChars(baseType);
-  llvm::Value *baseSizeInChars
-    = llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity());
+  llvm::Value *baseSizeInChars =
+      llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity());
 
   Address begin = dest.withElementType(CGF.Int8Ty);
   llvm::Value *end = Builder.CreateInBoundsGEP(begin.getElementType(),
@@ -2206,8 +2214,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   llvm::PHINode *cur = Builder.CreatePHI(begin.getType(), 2, "vla.cur");
   cur->addIncoming(begin.emitRawPointer(CGF), originBB);
 
-  CharUnits curAlign =
-    dest.getAlignment().alignmentOfArrayElement(baseSize);
+  CharUnits curAlign = dest.getAlignment().alignmentOfArrayElement(baseSize);
 
   // memcpy the individual element bit-pattern.
   Builder.CreateMemCpy(Address(cur, CGF.Int8Ty, curAlign), src, baseSizeInChars,
@@ -2215,7 +2222,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
 
   // Go to the next element.
   llvm::Value *next =
-    Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next");
+      Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next");
 
   // Leave if that's the end of the VLA.
   llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone");
@@ -2225,8 +2232,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   CGF.EmitBlock(contBB);
 }
 
-void
-CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
+void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // Ignore empty classes in C++.
   if (getLangOpts().CPlusPlus)
     if (const auto *RD = Ty->getAsCXXRecordDecl(); RD && RD->isEmpty())
@@ -2244,9 +2250,8 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // Don't bother emitting a zero-byte memset.
   if (size.isZero()) {
     // But note that getTypeInfo returns 0 for a VLA.
-    if (const VariableArrayType *vlaType =
-          dyn_cast_or_null<VariableArrayType>(
-                                          getContext().getAsArrayType(Ty))) {
+    if (const VariableArrayType *vlaType = dyn_cast_or_null<VariableArrayType>(
+            getContext().getAsArrayType(Ty))) {
       auto VlaSize = getVLASize(vlaType);
       SizeVal = VlaSize.NumElts;
       CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type);
@@ -2267,20 +2272,21 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // like -1, which happens to be the pattern used by member-pointers.
   if (!CGM.getTypes().isZeroInitializable(Ty)) {
     // For a VLA, emit a single element, then splat that over the VLA.
-    if (vla) Ty = getContext().getBaseElementType(vla);
+    if (vla)
+      Ty = getContext().getBaseElementType(vla);
 
     llvm::Constant *NullConstant = CGM.EmitNullConstant(Ty);
 
-    llvm::GlobalVariable *NullVariable =
-      new llvm::GlobalVariable(CGM.getModule(), NullConstant->getType(),
-                               /*isConstant=*/true,
-                               llvm::GlobalVariable::PrivateLinkage,
-                               NullConstant, Twine());
+    llvm::GlobalVariable *NullVariable = new llvm::GlobalVariable(
+        CGM.getModule(), NullConstant->getType(),
+        /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, NullConstant,
+        Twine());
     CharUnits NullAlign = DestPtr.getAlignment();
     NullVariable->setAlignment(NullAlign.getAsAlign());
     Address SrcPtr(NullVariable, Builder.getInt8Ty(), NullAlign);
 
-    if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
+    if (vla)
+      return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
 
     // Get and call the appropriate llvm.memcpy overload.
     Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, false);
@@ -2307,13 +2313,14 @@ llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
 
 llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() {
   // If we already made the indirect branch for indirect goto, return its block.
-  if (IndirectBranch) return IndirectBranch->getParent();
+  if (IndirectBranch)
+    return IndirectBranch->getParent();
 
   CGBuilderTy TmpBuilder(*this, createBasicBlock("indirectgoto"));
 
   // Create the PHI node that indirect gotos will add entries to.
-  llvm::Value *DestVal = TmpBuilder.CreatePHI(Int8PtrTy, 0,
-                                              "indirect.goto.dest");
+  llvm::Value *DestVal =
+      TmpBuilder.CreatePHI(Int8PtrTy, 0, "indirect.goto.dest");
 
   // Create the indirect branch instruction.
   IndirectBranch = TmpBuilder.CreateIndirectBr(DestVal);
@@ -2353,7 +2360,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
   // We have some number of constant-length arrays, so addr should
   // have LLVM type [M x [N x [...]]]*.  Build a GEP that walks
   // down to the first element of addr.
-  SmallVector<llvm::Value*, 8> gepIndices;
+  SmallVector<llvm::Value *, 8> gepIndices;
 
   // GEP down to the array type.
   llvm::ConstantInt *zero = Builder.getInt32(0);
@@ -2363,7 +2370,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
   QualType eltType;
 
   llvm::ArrayType *llvmArrayType =
-    dyn_cast<llvm::ArrayType>(addr.getElementType());
+      dyn_cast<llvm::ArrayType>(addr.getElementType());
   while (llvmArrayType) {
     assert(isa<ConstantArrayType>(arrayType));
     assert(cast<ConstantArrayType>(arrayType)->getZExtSize() ==
@@ -2373,8 +2380,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
     countFromCLAs *= llvmArrayType->getNumElements();
     eltType = arrayType->getElementType();
 
-    llvmArrayType =
-      dyn_cast<llvm::ArrayType>(llvmArrayType->getElementType());
+    llvmArrayType = dyn_cast<llvm::ArrayType>(llvmArrayType->getElementType());
     arrayType = getContext().getAsArrayType(arrayType->getElementType());
     assert((!llvmArrayType || arrayType) &&
            "LLVM and Clang types are out-of-synch");
@@ -2402,8 +2408,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
 
   baseType = eltType;
 
-  llvm::Value *numElements
-    = llvm::ConstantInt::get(SizeTy, countFromCLAs);
+  llvm::Value *numElements = llvm::ConstantInt::get(SizeTy, countFromCLAs);
 
   // If we had any VLA dimensions, factor them in.
   if (numVLAElements)
@@ -2439,11 +2444,10 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) {
     }
   } while ((type = getContext().getAsVariableArrayType(elementType)));
 
-  return { numElements, elementType };
+  return {numElements, elementType};
 }
 
-CodeGenFunction::VlaSizePair
-CodeGenFunction::getVLAElements1D(QualType type) {
+CodeGenFunction::VlaSizePair CodeGenFunction::getVLAElements1D(QualType type) {
   const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
   assert(vla && "type was not a variable array type!");
   return getVLAElements1D(vla);
@@ -2454,7 +2458,7 @@ CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) {
   llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()];
   assert(VlaSize && "no size for VLA!");
   assert(VlaSize->getType() == SizeTy);
-  return { VlaSize, Vla->getElementType() };
+  return {VlaSize, Vla->getElementType()};
 }
 
 void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
@@ -2616,7 +2620,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
   } while (type->isVariablyModifiedType());
 }
 
-Address CodeGenFunction::EmitVAListRef(const Expr* E) {
+Address CodeGenFunction::EmitVAListRef(const Expr *E) {
   if (getContext().getBuiltinVaListType()->isArrayType())
     return EmitPointerWithAlignment(E);
   return EmitLValue(E).getAddress();
@@ -2640,9 +2644,11 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) {
   // is trunc(zext) folding, but if we add more, we can easily
   // extend this protection.
 
-  if (!rvalue.isScalar()) return PeepholeProtection();
+  if (!rvalue.isScalar())
+    return PeepholeProtection();
   llvm::Value *value = rvalue.getScalarVal();
-  if (!isa<llvm::ZExtInst>(value)) return PeepholeProtection();
+  if (!isa<llvm::ZExtInst>(value))
+    return PeepholeProtection();
 
   // Just make an extra bitcast.
   assert(HaveInsertPoint());
@@ -2655,7 +2661,8 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) {
 }
 
 void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
-  if (!protection.Inst) return;
+  if (!protection.Inst)
+    return;
 
   // In theory, we could try to duplicate the peepholes now, but whatever.
   protection.Inst->eraseFromParent();
@@ -2762,7 +2769,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
   return Address(V, Addr.getElementType(), Addr.getAlignment());
 }
 
-CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { }
+CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() {}
 
 CodeGenFunction::SanitizerScope::SanitizerScope(CodeGenFunction *CGF)
     : CGF(CGF) {
@@ -2845,12 +2852,13 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
   // referenced by an accelerator executable function, we emit an error.
   bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
   if (BuiltinID) {
-    StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
-    if (!Builtin::evaluateRequiredTargetFeatures(
-        FeatureList, CallerFeatureMap) && !IsHipStdPar) {
+    StringRef FeatureList(
+        CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+    if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
+                                                 CallerFeatureMap) &&
+        !IsHipStdPar) {
       CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
-          << TargetDecl->getDeclName()
-          << FeatureList;
+          << TargetDecl->getDeclName() << FeatureList;
     }
   } else if (!TargetDecl->isMultiVersion() &&
              TargetDecl->hasAttr<TargetAttr>()) {

>From b986c7407983672eef4e1c2d80af8ae16921d92d Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Wed, 29 Oct 2025 20:28:02 +0700
Subject: [PATCH 3/8] [Clang][CodeGen] Make naked lambda captures analyzed

Naked lambdas now have their captures analyzed and metadata populated even though they cannot be accessed via generated code as naked functions have no prologue

This matches GCC's acceptance of naked lambdas with captures.
---
 clang/lib/CodeGen/CodeGenFunction.cpp         |  7 +++++
 .../CodeGenCXX/naked-lambda-capture-multi.cpp | 18 ++++++++++++
 .../CodeGenCXX/naked-lambda-capture-this.cpp  | 28 +++++++++++++++++++
 .../CodeGenCXX/naked-lambda-capture-var.cpp   | 21 ++++++++++++++
 clang/test/CodeGenCXX/naked-lambda.cpp        |  2 --
 5 files changed, 74 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/naked-lambda-capture-multi.cpp
 create mode 100644 clang/test/CodeGenCXX/naked-lambda-capture-this.cpp
 create mode 100644 clang/test/CodeGenCXX/naked-lambda-capture-var.cpp

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index a4affa30f491a..fafcdbeeddbc7 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1330,6 +1330,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
         // fast register allocator would be happier...
         CXXThisValue = CXXABIThisValue;
       }
+    } else if (IsInLambda && MD->isImplicitObjectMemberFunction()) {
+      // Populate capture fields metadata for analysis. We skip
+      // EmitInstanceProlog to avoid emitting prologue code.
+      // FIXME: Naked functions cannot access captures via LLVM IR; any access
+      // must be done manually in inline assembly.
+      MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                        LambdaThisCaptureField);
     }
 
     // Check the 'this' pointer once per function, if it's available.
diff --git a/clang/test/CodeGenCXX/naked-lambda-capture-multi.cpp b/clang/test/CodeGenCXX/naked-lambda-capture-multi.cpp
new file mode 100644
index 0000000000000..de52d6dec77e9
--- /dev/null
+++ b/clang/test/CodeGenCXX/naked-lambda-capture-multi.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+void test_naked_lambda_capture_multi() {
+  int x = 42;
+  int y = 100;
+  auto l = [&x, y]() __attribute__((naked)) {
+    asm volatile("retq");
+  };
+  l();
+}
+
+// CHECK-LABEL: define {{.*}} @"_ZZ31test_naked_lambda_capture_multivENK3$_0clEv"
+// CHECK-NOT: load i32
+// CHECK-NOT: load ptr
+// CHECK-NOT: getelementptr
+// CHECK-NOT: alloca
+// CHECK: call void asm sideeffect "retq"
+// CHECK-NEXT: unreachable
diff --git a/clang/test/CodeGenCXX/naked-lambda-capture-this.cpp b/clang/test/CodeGenCXX/naked-lambda-capture-this.cpp
new file mode 100644
index 0000000000000..c6bcd2f83b8eb
--- /dev/null
+++ b/clang/test/CodeGenCXX/naked-lambda-capture-this.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S %s -o - | FileCheck %s --check-prefix=ASM
+
+struct S {
+  int member;
+  void test_naked_lambda_capture_this() {
+    auto l = [this]() __attribute__((naked)) {
+      asm volatile("retq");
+    };
+    l();
+  }
+};
+
+void test() {
+  S s;
+  s.test_naked_lambda_capture_this();
+}
+
+// CHECK-LABEL: define {{.*}} @_ZZN1S30test_naked_lambda_capture_thisEvENKUlvE_clEv
+// CHECK-NOT: load ptr
+// CHECK-NOT: getelementptr
+// CHECK-NOT: alloca
+// CHECK: call void asm sideeffect "retq"
+
+// ASM-LABEL: _ZZN1S30test_naked_lambda_capture_thisEvENKUlvE_clEv:
+// ASM-NOT: push
+// ASM-NOT: mov
+// ASM: retq
diff --git a/clang/test/CodeGenCXX/naked-lambda-capture-var.cpp b/clang/test/CodeGenCXX/naked-lambda-capture-var.cpp
new file mode 100644
index 0000000000000..543c38bd5ab45
--- /dev/null
+++ b/clang/test/CodeGenCXX/naked-lambda-capture-var.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S %s -o - | FileCheck %s --check-prefix=ASM
+
+void test_naked_lambda_capture_var() {
+  int x = 42;
+  auto l = [x]() __attribute__((naked)) {
+    asm volatile("retq");
+  };
+  l();
+}
+
+// CHECK-LABEL: define {{.*}} @"_ZZ29test_naked_lambda_capture_varvENK3$_0clEv"
+// CHECK-NOT: load i32
+// CHECK-NOT: alloca
+// CHECK-NOT: getelementptr
+// CHECK: call void asm sideeffect "retq"
+
+// ASM-LABEL: _ZZ29test_naked_lambda_capture_varvENK3$_0clEv:
+// ASM-NOT: push
+// ASM-NOT: mov
+// ASM: retq
diff --git a/clang/test/CodeGenCXX/naked-lambda.cpp b/clang/test/CodeGenCXX/naked-lambda.cpp
index 27ea1af09da01..c24699495c0cf 100644
--- a/clang/test/CodeGenCXX/naked-lambda.cpp
+++ b/clang/test/CodeGenCXX/naked-lambda.cpp
@@ -16,5 +16,3 @@ void test_naked_lambda() {
 // ASM-NOT: push
 // ASM-NOT: pop
 // ASM: retq
-
-

>From 6e341cbcce044ae14582310b9e23bcbdc74d4138 Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Wed, 29 Oct 2025 20:42:01 +0700
Subject: [PATCH 4/8] Revert file back

---
 clang/lib/CodeGen/CodeGenFunction.cpp | 243 ++++++++++++--------------
 1 file changed, 109 insertions(+), 134 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index fafcdbeeddbc7..88628530cf66b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -46,7 +46,6 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CRC.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Support/xxhash.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -115,12 +114,9 @@ llvm::fp::ExceptionBehavior
 clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
 
   switch (Kind) {
-  case LangOptions::FPE_Ignore:
-    return llvm::fp::ebIgnore;
-  case LangOptions::FPE_MayTrap:
-    return llvm::fp::ebMayTrap;
-  case LangOptions::FPE_Strict:
-    return llvm::fp::ebStrict;
+  case LangOptions::FPE_Ignore:  return llvm::fp::ebIgnore;
+  case LangOptions::FPE_MayTrap: return llvm::fp::ebMayTrap;
+  case LangOptions::FPE_Strict:  return llvm::fp::ebStrict;
   default:
     llvm_unreachable("Unsupported FP Exception Behavior");
   }
@@ -178,7 +174,8 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
          "FPConstrained should be enabled on entire function");
 
   auto mergeFnAttrValue = [&](StringRef Name, bool Value) {
-    auto OldValue = CGF.CurFn->getFnAttribute(Name).getValueAsBool();
+    auto OldValue =
+        CGF.CurFn->getFnAttribute(Name).getValueAsBool();
     auto NewValue = OldValue & Value;
     if (OldValue != NewValue)
       CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue));
@@ -218,8 +215,8 @@ CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T,
                                       IsKnownNonNull);
 }
 
-LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V,
-                                                          QualType T) {
+LValue
+CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
   return ::makeNaturalAlignAddrLValue(V, T, /*ForPointeeType*/ true,
                                       /*MightBeSigned*/ true, *this);
 }
@@ -332,7 +329,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
   // cleans up functions which started with a unified return block.
   if (ReturnBlock.getBlock()->hasOneUse()) {
     llvm::BranchInst *BI =
-        dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
+      dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
     if (BI && BI->isUnconditional() &&
         BI->getSuccessor(0) == ReturnBlock.getBlock()) {
       // Record/return the DebugLoc of the simple 'return' expression to be used
@@ -355,8 +352,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
 }
 
 static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) {
-  if (!BB)
-    return;
+  if (!BB) return;
   if (!BB->use_empty()) {
     CGF.CurFn->insert(CGF.CurFn->end(), BB);
     return;
@@ -378,9 +374,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
            "mismatched push/pop in convergence stack!");
   }
 
-  bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 &&
-                               NumSimpleReturnExprs == NumReturnExprs &&
-                               ReturnBlock.getBlock()->use_empty();
+  bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0
+    && NumSimpleReturnExprs == NumReturnExprs
+    && ReturnBlock.getBlock()->use_empty();
   // Usually the return expression is evaluated before the cleanup
   // code.  If the function contains only a simple return statement,
   // such as a constant, the location before the cleanup code becomes
@@ -448,7 +444,8 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
                      RetKeyInstructionsAtomGroup);
   EmitEndEHSpec(CurCodeDecl);
 
-  assert(EHStack.empty() && "did not remove all scopes from cleanup stack!");
+  assert(EHStack.empty() &&
+         "did not remove all scopes from cleanup stack!");
 
   // If someone did an indirect goto, emit the indirect goto block at the end of
   // the function.
@@ -658,8 +655,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getXDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getYDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getZDim())))};
-    Fn->setMetadata("work_group_size_hint",
-                    llvm::MDNode::get(Context, AttrMDArgs));
+    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
@@ -670,8 +666,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getXDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getYDim()))),
         llvm::ConstantAsMetadata::get(Builder.getInt32(Eval(A->getZDim())))};
-    Fn->setMetadata("reqd_work_group_size",
-                    llvm::MDNode::get(Context, AttrMDArgs));
+    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const OpenCLIntelReqdSubGroupSizeAttr *A =
@@ -684,7 +679,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
 }
 
 /// Determine whether the function F ends with a return stmt.
-static bool endsWithReturn(const Decl *F) {
+static bool endsWithReturn(const Decl* F) {
   const Stmt *Body = nullptr;
   if (auto *FD = dyn_cast_or_null<FunctionDecl>(F))
     Body = FD->getBody();
@@ -874,8 +869,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       const IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0);
       if (OMD->getMethodFamily() == OMF_dealloc ||
           OMD->getMethodFamily() == OMF_initialize ||
-          (OMD->getSelector().isUnarySelector() &&
-           II->isStr(".cxx_destruct"))) {
+          (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) {
         markAsIgnoreThreadCheckingAtRuntime(Fn);
       }
     }
@@ -1188,14 +1182,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       if (CGM.getCodeGenOpts().MNopMCount) {
         if (!CGM.getCodeGenOpts().CallFEntry)
           CGM.getDiags().Report(diag::err_opt_not_valid_without_opt)
-              << "-mnop-mcount" << "-mfentry";
+            << "-mnop-mcount" << "-mfentry";
         Fn->addFnAttr("mnop-mcount");
       }
 
       if (CGM.getCodeGenOpts().RecordMCount) {
         if (!CGM.getCodeGenOpts().CallFEntry)
           CGM.getDiags().Report(diag::err_opt_not_valid_without_opt)
-              << "-mrecord-mcount" << "-mfentry";
+            << "-mrecord-mcount" << "-mfentry";
         Fn->addFnAttr("mrecord-mcount");
       }
     }
@@ -1205,7 +1199,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     if (getContext().getTargetInfo().getTriple().getArch() !=
         llvm::Triple::systemz)
       CGM.getDiags().Report(diag::err_opt_not_valid_on_target)
-          << "-mpacked-stack";
+        << "-mpacked-stack";
     Fn->addFnAttr("packed-stack");
   }
 
@@ -1242,8 +1236,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
     llvm::Function::arg_iterator EI = CurFn->arg_end();
     --EI;
-    llvm::Value *Addr =
-        Builder.CreateStructGEP(CurFnInfo->getArgStruct(), &*EI, Idx);
+    llvm::Value *Addr = Builder.CreateStructGEP(
+        CurFnInfo->getArgStruct(), &*EI, Idx);
     llvm::Type *Ty =
         cast<llvm::GetElementPtrInst>(Addr)->getResultElementType();
     ReturnValuePointer = Address(Addr, Ty, getPointerAlign());
@@ -1256,7 +1250,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     // Tell the epilog emitter to autorelease the result.  We do this
     // now so that various specialized functions can suppress it
     // during their IR-generation.
-    if (getLangOpts().ObjCAutoRefCount && !CurFnInfo->isReturnsRetained() &&
+    if (getLangOpts().ObjCAutoRefCount &&
+        !CurFnInfo->isReturnsRetained() &&
         RetTy->isObjCRetainableType())
       AutoreleaseResult = true;
   }
@@ -1282,61 +1277,44 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       MD && !MD->isStatic()) {
     bool IsInLambda =
         MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call;
-
-    const FunctionDecl *FD = dyn_cast_if_present<FunctionDecl>(D);
-    bool IsNaked = FD && FD->hasAttr<NakedAttr>();
-
-    if (!IsNaked) {
-      if (MD->isImplicitObjectMemberFunction())
-        CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
-
-      if (IsInLambda) {
-        // We're in a lambda; figure out the captures.
-        MD->getParent()->getCaptureFields(LambdaCaptureFields,
-                                          LambdaThisCaptureField);
-        if (LambdaThisCaptureField) {
-          // If the lambda captures the object referred to by '*this' - either
-          // by value or by reference, make sure CXXThisValue points to the
-          // correct object.
-
-          // Get the lvalue for the field (which is a copy of the enclosing
-          // object or contains the address of the enclosing object).
-          LValue ThisFieldLValue =
-              EmitLValueForLambdaField(LambdaThisCaptureField);
-          if (!LambdaThisCaptureField->getType()->isPointerType()) {
-            // If the enclosing object was captured by value, just use its
-            // address. Sign this pointer.
-            CXXThisValue = ThisFieldLValue.getPointer(*this);
-          } else {
-            // Load the lvalue pointed to by the field, since '*this' was
-            // captured by reference.
-            CXXThisValue = EmitLoadOfLValue(ThisFieldLValue, SourceLocation())
-                               .getScalarVal();
-          }
+    if (MD->isImplicitObjectMemberFunction())
+      CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+    if (IsInLambda) {
+      // We're in a lambda; figure out the captures.
+      MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                        LambdaThisCaptureField);
+      if (LambdaThisCaptureField) {
+        // If the lambda captures the object referred to by '*this' - either by
+        // value or by reference, make sure CXXThisValue points to the correct
+        // object.
+
+        // Get the lvalue for the field (which is a copy of the enclosing object
+        // or contains the address of the enclosing object).
+        LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
+        if (!LambdaThisCaptureField->getType()->isPointerType()) {
+          // If the enclosing object was captured by value, just use its
+          // address. Sign this pointer.
+          CXXThisValue = ThisFieldLValue.getPointer(*this);
+        } else {
+          // Load the lvalue pointed to by the field, since '*this' was captured
+          // by reference.
+          CXXThisValue =
+              EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
         }
-
-        for (auto *FD : MD->getParent()->fields()) {
-          if (FD->hasCapturedVLAType()) {
-            auto *ExprArg =
-                EmitLoadOfLValue(EmitLValueForLambdaField(FD), SourceLocation())
-                    .getScalarVal();
-            auto VAT = FD->getCapturedVLAType();
-            VLASizeMap[VAT->getSizeExpr()] = ExprArg;
-          }
+      }
+      for (auto *FD : MD->getParent()->fields()) {
+        if (FD->hasCapturedVLAType()) {
+          auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD),
+                                           SourceLocation()).getScalarVal();
+          auto VAT = FD->getCapturedVLAType();
+          VLASizeMap[VAT->getSizeExpr()] = ExprArg;
         }
-      } else if (MD->isImplicitObjectMemberFunction()) {
-        // Not in a lambda; just use 'this' from the method.
-        // FIXME: Should we generate a new load for each use of 'this'?  The
-        // fast register allocator would be happier...
-        CXXThisValue = CXXABIThisValue;
       }
-    } else if (IsInLambda && MD->isImplicitObjectMemberFunction()) {
-      // Populate capture fields metadata for analysis. We skip
-      // EmitInstanceProlog to avoid emitting prologue code.
-      // FIXME: Naked functions cannot access captures via LLVM IR; any access
-      // must be done manually in inline assembly.
-      MD->getParent()->getCaptureFields(LambdaCaptureFields,
-                                        LambdaThisCaptureField);
+    } else if (MD->isImplicitObjectMemberFunction()) {
+      // Not in a lambda; just use 'this' from the method.
+      // FIXME: Should we generate a new load for each use of 'this'?  The
+      // fast register allocator would be happier...
+      CXXThisValue = CXXABIThisValue;
     }
 
     // Check the 'this' pointer once per function, if it's available.
@@ -1428,8 +1406,7 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
 static void TryMarkNoThrow(llvm::Function *F) {
   // LLVM treats 'nounwind' on a function as part of the type, so we
   // can't do this on functions that can be overwritten.
-  if (F->isInterposable())
-    return;
+  if (F->isInterposable()) return;
 
   for (llvm::BasicBlock &BB : *F)
     for (llvm::Instruction &I : BB)
@@ -1597,7 +1574,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     EmitDestructorBody(Args);
   else if (isa<CXXConstructorDecl>(FD))
     EmitConstructorBody(Args);
-  else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
+  else if (getLangOpts().CUDA &&
+           !getLangOpts().CUDAIsDevice &&
            FD->hasAttr<CUDAGlobalAttr>())
     CGM.getCUDARuntime().emitDeviceStub(*this, Args);
   else if (isa<CXXMethodDecl>(FD) &&
@@ -1692,8 +1670,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
 /// that we can just remove the code.
 bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
   // Null statement, not a label!
-  if (!S)
-    return false;
+  if (!S) return false;
 
   // If this is a label, we have to emit the code, consider something like:
   // if (0) {  ...  foo:  bar(); }  goto foo;
@@ -1725,8 +1702,7 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
 /// inside of it, this is fine.
 bool CodeGenFunction::containsBreak(const Stmt *S) {
   // Null statement, not a label!
-  if (!S)
-    return false;
+  if (!S) return false;
 
   // If this is a switch or loop that defines its own break scope, then we can
   // include it and anything inside of it.
@@ -1746,8 +1722,7 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
 }
 
 bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) {
-  if (!S)
-    return false;
+  if (!S) return false;
 
   // Some statement kinds add a scope and thus never add a decl to the current
   // scope. Note, this list is longer than the list of statements that might
@@ -1800,11 +1775,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
   // to bool.
   Expr::EvalResult Result;
   if (!Cond->EvaluateAsInt(Result, getContext()))
-    return false; // Not foldable, not integer or not fully evaluatable.
+    return false;  // Not foldable, not integer or not fully evaluatable.
 
   llvm::APSInt Int = Result.Val.getInt();
   if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
-    return false; // Contains a label.
+    return false;  // Contains a label.
 
   PGO->markStmtMaybeUsed(Cond);
   ResultInt = Int;
@@ -2101,7 +2076,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(
     //   br(c ? throw x : y, t, f) -> br(c, br(throw x, t, f), br(y, t, f)
     // Fold this to:
     //   br(c, throw x, br(y, t, f))
-    EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/ false);
+    EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/false);
     return;
   }
 
@@ -2202,8 +2177,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   CGBuilderTy &Builder = CGF.Builder;
 
   CharUnits baseSize = CGF.getContext().getTypeSizeInChars(baseType);
-  llvm::Value *baseSizeInChars =
-      llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity());
+  llvm::Value *baseSizeInChars
+    = llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity());
 
   Address begin = dest.withElementType(CGF.Int8Ty);
   llvm::Value *end = Builder.CreateInBoundsGEP(begin.getElementType(),
@@ -2221,7 +2196,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   llvm::PHINode *cur = Builder.CreatePHI(begin.getType(), 2, "vla.cur");
   cur->addIncoming(begin.emitRawPointer(CGF), originBB);
 
-  CharUnits curAlign = dest.getAlignment().alignmentOfArrayElement(baseSize);
+  CharUnits curAlign =
+    dest.getAlignment().alignmentOfArrayElement(baseSize);
 
   // memcpy the individual element bit-pattern.
   Builder.CreateMemCpy(Address(cur, CGF.Int8Ty, curAlign), src, baseSizeInChars,
@@ -2229,7 +2205,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
 
   // Go to the next element.
   llvm::Value *next =
-      Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next");
+    Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next");
 
   // Leave if that's the end of the VLA.
   llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone");
@@ -2239,7 +2215,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   CGF.EmitBlock(contBB);
 }
 
-void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
+void
+CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // Ignore empty classes in C++.
   if (getLangOpts().CPlusPlus)
     if (const auto *RD = Ty->getAsCXXRecordDecl(); RD && RD->isEmpty())
@@ -2257,8 +2234,9 @@ void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // Don't bother emitting a zero-byte memset.
   if (size.isZero()) {
     // But note that getTypeInfo returns 0 for a VLA.
-    if (const VariableArrayType *vlaType = dyn_cast_or_null<VariableArrayType>(
-            getContext().getAsArrayType(Ty))) {
+    if (const VariableArrayType *vlaType =
+          dyn_cast_or_null<VariableArrayType>(
+                                          getContext().getAsArrayType(Ty))) {
       auto VlaSize = getVLASize(vlaType);
       SizeVal = VlaSize.NumElts;
       CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type);
@@ -2279,21 +2257,20 @@ void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // like -1, which happens to be the pattern used by member-pointers.
   if (!CGM.getTypes().isZeroInitializable(Ty)) {
     // For a VLA, emit a single element, then splat that over the VLA.
-    if (vla)
-      Ty = getContext().getBaseElementType(vla);
+    if (vla) Ty = getContext().getBaseElementType(vla);
 
     llvm::Constant *NullConstant = CGM.EmitNullConstant(Ty);
 
-    llvm::GlobalVariable *NullVariable = new llvm::GlobalVariable(
-        CGM.getModule(), NullConstant->getType(),
-        /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, NullConstant,
-        Twine());
+    llvm::GlobalVariable *NullVariable =
+      new llvm::GlobalVariable(CGM.getModule(), NullConstant->getType(),
+                               /*isConstant=*/true,
+                               llvm::GlobalVariable::PrivateLinkage,
+                               NullConstant, Twine());
     CharUnits NullAlign = DestPtr.getAlignment();
     NullVariable->setAlignment(NullAlign.getAsAlign());
     Address SrcPtr(NullVariable, Builder.getInt8Ty(), NullAlign);
 
-    if (vla)
-      return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
+    if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
 
     // Get and call the appropriate llvm.memcpy overload.
     Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, false);
@@ -2320,14 +2297,13 @@ llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
 
 llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() {
   // If we already made the indirect branch for indirect goto, return its block.
-  if (IndirectBranch)
-    return IndirectBranch->getParent();
+  if (IndirectBranch) return IndirectBranch->getParent();
 
   CGBuilderTy TmpBuilder(*this, createBasicBlock("indirectgoto"));
 
   // Create the PHI node that indirect gotos will add entries to.
-  llvm::Value *DestVal =
-      TmpBuilder.CreatePHI(Int8PtrTy, 0, "indirect.goto.dest");
+  llvm::Value *DestVal = TmpBuilder.CreatePHI(Int8PtrTy, 0,
+                                              "indirect.goto.dest");
 
   // Create the indirect branch instruction.
   IndirectBranch = TmpBuilder.CreateIndirectBr(DestVal);
@@ -2367,7 +2343,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
   // We have some number of constant-length arrays, so addr should
   // have LLVM type [M x [N x [...]]]*.  Build a GEP that walks
   // down to the first element of addr.
-  SmallVector<llvm::Value *, 8> gepIndices;
+  SmallVector<llvm::Value*, 8> gepIndices;
 
   // GEP down to the array type.
   llvm::ConstantInt *zero = Builder.getInt32(0);
@@ -2377,7 +2353,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
   QualType eltType;
 
   llvm::ArrayType *llvmArrayType =
-      dyn_cast<llvm::ArrayType>(addr.getElementType());
+    dyn_cast<llvm::ArrayType>(addr.getElementType());
   while (llvmArrayType) {
     assert(isa<ConstantArrayType>(arrayType));
     assert(cast<ConstantArrayType>(arrayType)->getZExtSize() ==
@@ -2387,7 +2363,8 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
     countFromCLAs *= llvmArrayType->getNumElements();
     eltType = arrayType->getElementType();
 
-    llvmArrayType = dyn_cast<llvm::ArrayType>(llvmArrayType->getElementType());
+    llvmArrayType =
+      dyn_cast<llvm::ArrayType>(llvmArrayType->getElementType());
     arrayType = getContext().getAsArrayType(arrayType->getElementType());
     assert((!llvmArrayType || arrayType) &&
            "LLVM and Clang types are out-of-synch");
@@ -2415,7 +2392,8 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
 
   baseType = eltType;
 
-  llvm::Value *numElements = llvm::ConstantInt::get(SizeTy, countFromCLAs);
+  llvm::Value *numElements
+    = llvm::ConstantInt::get(SizeTy, countFromCLAs);
 
   // If we had any VLA dimensions, factor them in.
   if (numVLAElements)
@@ -2451,10 +2429,11 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) {
     }
   } while ((type = getContext().getAsVariableArrayType(elementType)));
 
-  return {numElements, elementType};
+  return { numElements, elementType };
 }
 
-CodeGenFunction::VlaSizePair CodeGenFunction::getVLAElements1D(QualType type) {
+CodeGenFunction::VlaSizePair
+CodeGenFunction::getVLAElements1D(QualType type) {
   const VariableArrayType *vla = getContext().getAsVariableArrayType(type);
   assert(vla && "type was not a variable array type!");
   return getVLAElements1D(vla);
@@ -2465,7 +2444,7 @@ CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) {
   llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()];
   assert(VlaSize && "no size for VLA!");
   assert(VlaSize->getType() == SizeTy);
-  return {VlaSize, Vla->getElementType()};
+  return { VlaSize, Vla->getElementType() };
 }
 
 void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
@@ -2627,7 +2606,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
   } while (type->isVariablyModifiedType());
 }
 
-Address CodeGenFunction::EmitVAListRef(const Expr *E) {
+Address CodeGenFunction::EmitVAListRef(const Expr* E) {
   if (getContext().getBuiltinVaListType()->isArrayType())
     return EmitPointerWithAlignment(E);
   return EmitLValue(E).getAddress();
@@ -2651,11 +2630,9 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) {
   // is trunc(zext) folding, but if we add more, we can easily
   // extend this protection.
 
-  if (!rvalue.isScalar())
-    return PeepholeProtection();
+  if (!rvalue.isScalar()) return PeepholeProtection();
   llvm::Value *value = rvalue.getScalarVal();
-  if (!isa<llvm::ZExtInst>(value))
-    return PeepholeProtection();
+  if (!isa<llvm::ZExtInst>(value)) return PeepholeProtection();
 
   // Just make an extra bitcast.
   assert(HaveInsertPoint());
@@ -2668,8 +2645,7 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) {
 }
 
 void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
-  if (!protection.Inst)
-    return;
+  if (!protection.Inst) return;
 
   // In theory, we could try to duplicate the peepholes now, but whatever.
   protection.Inst->eraseFromParent();
@@ -2776,7 +2752,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
   return Address(V, Addr.getElementType(), Addr.getAlignment());
 }
 
-CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() {}
+CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { }
 
 CodeGenFunction::SanitizerScope::SanitizerScope(CodeGenFunction *CGF)
     : CGF(CGF) {
@@ -2859,13 +2835,12 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
   // referenced by an accelerator executable function, we emit an error.
   bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
   if (BuiltinID) {
-    StringRef FeatureList(
-        CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
-    if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
-                                                 CallerFeatureMap) &&
-        !IsHipStdPar) {
+    StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+    if (!Builtin::evaluateRequiredTargetFeatures(
+        FeatureList, CallerFeatureMap) && !IsHipStdPar) {
       CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
-          << TargetDecl->getDeclName() << FeatureList;
+          << TargetDecl->getDeclName()
+          << FeatureList;
     }
   } else if (!TargetDecl->isMultiVersion() &&
              TargetDecl->hasAttr<TargetAttr>()) {

>From e5a214d2e990add5c182438149a013295b6bd123 Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Wed, 29 Oct 2025 20:44:43 +0700
Subject: [PATCH 5/8] [Clang][CodeGen] Format

---
 clang/lib/CodeGen/CodeGenFunction.cpp | 86 ++++++++++++++++-----------
 1 file changed, 51 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 88628530cf66b..58d7810e26cf5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1277,44 +1277,60 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       MD && !MD->isStatic()) {
     bool IsInLambda =
         MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call;
-    if (MD->isImplicitObjectMemberFunction())
-      CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
-    if (IsInLambda) {
-      // We're in a lambda; figure out the captures.
-      MD->getParent()->getCaptureFields(LambdaCaptureFields,
-                                        LambdaThisCaptureField);
-      if (LambdaThisCaptureField) {
-        // If the lambda captures the object referred to by '*this' - either by
-        // value or by reference, make sure CXXThisValue points to the correct
-        // object.
-
-        // Get the lvalue for the field (which is a copy of the enclosing object
-        // or contains the address of the enclosing object).
-        LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
-        if (!LambdaThisCaptureField->getType()->isPointerType()) {
-          // If the enclosing object was captured by value, just use its
-          // address. Sign this pointer.
-          CXXThisValue = ThisFieldLValue.getPointer(*this);
-        } else {
-          // Load the lvalue pointed to by the field, since '*this' was captured
-          // by reference.
-          CXXThisValue =
-              EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+
+    const FunctionDecl *FD = dyn_cast_if_present<FunctionDecl>(D);
+    bool IsNaked = FD && FD->hasAttr<NakedAttr>();
+    if (!IsNaked) {
+      if (MD->isImplicitObjectMemberFunction())
+        CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+
+      if (IsInLambda) {
+        // We're in a lambda; figure out the captures.
+        MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                          LambdaThisCaptureField);
+        if (LambdaThisCaptureField) {
+          // If the lambda captures the object referred to by '*this' - either
+          // by value or by reference, make sure CXXThisValue points to the
+          // correct object.
+
+          // Get the lvalue for the field (which is a copy of the enclosing
+          // object or contains the address of the enclosing object).
+          LValue ThisFieldLValue =
+              EmitLValueForLambdaField(LambdaThisCaptureField);
+          if (!LambdaThisCaptureField->getType()->isPointerType()) {
+            // If the enclosing object was captured by value, just use its
+            // address. Sign this pointer.
+            CXXThisValue = ThisFieldLValue.getPointer(*this);
+          } else {
+            // Load the lvalue pointed to by the field, since '*this' was
+            // captured by reference.
+            CXXThisValue = EmitLoadOfLValue(ThisFieldLValue, SourceLocation())
+                               .getScalarVal();
+          }
         }
-      }
-      for (auto *FD : MD->getParent()->fields()) {
-        if (FD->hasCapturedVLAType()) {
-          auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD),
-                                           SourceLocation()).getScalarVal();
-          auto VAT = FD->getCapturedVLAType();
-          VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+
+        for (auto *FD : MD->getParent()->fields()) {
+          if (FD->hasCapturedVLAType()) {
+            auto *ExprArg =
+                EmitLoadOfLValue(EmitLValueForLambdaField(FD), SourceLocation())
+                    .getScalarVal();
+            auto VAT = FD->getCapturedVLAType();
+            VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+          }
         }
+      } else if (MD->isImplicitObjectMemberFunction()) {
+        // Not in a lambda; just use 'this' from the method.
+        // FIXME: Should we generate a new load for each use of 'this'?  The
+        // fast register allocator would be happier...
+        CXXThisValue = CXXABIThisValue;
       }
-    } else if (MD->isImplicitObjectMemberFunction()) {
-      // Not in a lambda; just use 'this' from the method.
-      // FIXME: Should we generate a new load for each use of 'this'?  The
-      // fast register allocator would be happier...
-      CXXThisValue = CXXABIThisValue;
+    } else if (IsInLambda && MD->isImplicitObjectMemberFunction()) {
+      // Populate capture fields metadata for analysis. We skip
+      // EmitInstanceProlog to avoid emitting prologue code.
+      // FIXME: Naked functions cannot access captures via LLVM IR; any access
+      // must be done manually in inline assembly.
+      MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                        LambdaThisCaptureField);
     }
 
     // Check the 'this' pointer once per function, if it's available.

>From 79d94227c41fc5e5f054c0ebc1bf8e3ddf6336a4 Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Thu, 30 Oct 2025 17:16:10 +0700
Subject: [PATCH 6/8] [Clang][Sema] Remove naked attribute from lambdas with
 ODR-used captures

This commit is for GCC compatibility. GCC silently ignores the naked attribute if the captures are used.
---
 clang/compile_commands.json                    |  1 +
 clang/lib/Sema/SemaLambda.cpp                  | 14 ++++++++++++++
 .../naked-lambda-odr-used-captures.cpp         | 17 +++++++++++++++++
 clang/test/SemaCXX/naked-lambda-odr.cpp        | 18 ++++++++++++++++++
 4 files changed, 50 insertions(+)
 create mode 120000 clang/compile_commands.json
 create mode 100644 clang/test/CodeGenCXX/naked-lambda-odr-used-captures.cpp
 create mode 100644 clang/test/SemaCXX/naked-lambda-odr.cpp

diff --git a/clang/compile_commands.json b/clang/compile_commands.json
new file mode 120000
index 0000000000000..d54ffefc90207
--- /dev/null
+++ b/clang/compile_commands.json
@@ -0,0 +1 @@
+../build/compile_commands.json
\ No newline at end of file
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index fbc2e7eb30676..86095ed3ddc29 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -2332,6 +2332,20 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc,
     maybeAddDeclWithEffects(LSI->CallOperator);
   }
 
+  // This is for GCC compatibility. If any lambda captures are actually used in the
+  // function body. GCC silently removes the naked attribute when captures are
+  // ODR-used, as naked functions cannot have prologues to set up the closure.
+  if (CallOperator->hasAttr<NakedAttr>() && !Captures.empty()) {
+    // If any captures are ODR-used by examining the capture list
+    // that was already analyzed during semantic analysis, drop it.
+    for (const Capture &Cap : LSI->Captures) {
+      if (Cap.isODRUsed()) {
+        CallOperator->dropAttr<NakedAttr>();
+        break;
+      }
+    }
+  }
+
   return MaybeBindToTemporary(Lambda);
 }
 
diff --git a/clang/test/CodeGenCXX/naked-lambda-odr-used-captures.cpp b/clang/test/CodeGenCXX/naked-lambda-odr-used-captures.cpp
new file mode 100644
index 0000000000000..032d0b7853cd1
--- /dev/null
+++ b/clang/test/CodeGenCXX/naked-lambda-odr-used-captures.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+// Test that naked attribute is removed when captures are ODR-used (GCC compat)
+void test_odr_used_captures() {
+  int x = 42;
+  int y = 6;
+  auto l = [x, &y]() __attribute__((naked)) {
+    asm volatile("movl %0, %%eax\n\tmovl %1, %%ebx\n\tretq" : : "r"(x), "r"(y));
+  };
+  l();
+}
+
+// CHECK-LABEL: define internal void @"_ZZ22test_odr_used_capturesvENK3$_0clEv"
+// CHECK-NOT: naked
+// CHECK: alloca
+// CHECK: store
+
diff --git a/clang/test/SemaCXX/naked-lambda-odr.cpp b/clang/test/SemaCXX/naked-lambda-odr.cpp
new file mode 100644
index 0000000000000..7d019f9d99dd1
--- /dev/null
+++ b/clang/test/SemaCXX/naked-lambda-odr.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-pc-linux-gnu
+
+void uses_capture() {
+  int x = 42;
+  int y = 6;
+  auto l = [x, &y]() __attribute__((naked)) { // expected-no-diagnostics
+    asm volatile("movl %0, %%eax\n\tmovl %1, %%ebx\n\tretq" : : "r"(x), "r"(y));
+  };
+  l();
+}
+
+void unused_captures() {
+  int x = 42;
+  auto l = [x]() __attribute__((naked)) { // expected-no-diagnostics
+    asm volatile("retq");
+  };
+  l();
+}

>From ba6ece5cf0bd378449c19379067547c5586f187a Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Thu, 30 Oct 2025 17:21:27 +0700
Subject: [PATCH 7/8] Remove compilation database

---
 clang/compile_commands.json | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 clang/compile_commands.json

diff --git a/clang/compile_commands.json b/clang/compile_commands.json
deleted file mode 120000
index d54ffefc90207..0000000000000
--- a/clang/compile_commands.json
+++ /dev/null
@@ -1 +0,0 @@
-../build/compile_commands.json
\ No newline at end of file

>From d57f8043be86b6c3212c6b08285ff738ce92f753 Mon Sep 17 00:00:00 2001
From: typeal <type.alplusplus at gmail.com>
Date: Thu, 30 Oct 2025 17:27:59 +0700
Subject: [PATCH 8/8] Remove FIXME in the special path

---
 clang/lib/CodeGen/CodeGenFunction.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 58d7810e26cf5..5fc812e7eaef0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1327,8 +1327,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     } else if (IsInLambda && MD->isImplicitObjectMemberFunction()) {
       // Populate capture fields metadata for analysis. We skip
       // EmitInstanceProlog to avoid emitting prologue code.
-      // FIXME: Naked functions cannot access captures via LLVM IR; any access
-      // must be done manually in inline assembly.
       MD->getParent()->getCaptureFields(LambdaCaptureFields,
                                         LambdaThisCaptureField);
     }