[llvm-branch-commits] [clang] [llvm] [Clang][AIX] Add -mloadtime-comment-vars flag to preserve identifying variables (PR #187986)
Tony Varghese via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Mar 23 00:29:55 PDT 2026
https://github.com/tonykuttai created https://github.com/llvm/llvm-project/pull/187986
None
>From 9fa76d028cc835c1aa1ab55544a6e42d8523e8b3 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Wed, 18 Mar 2026 10:48:36 -0400
Subject: [PATCH] [Clang][AIX] Add -mloadtime-comment-vars flag to preserve
identifying variables
---
clang/include/clang/Basic/CodeGenOptions.h | 2 +
clang/include/clang/Options/Options.td | 7 +
clang/lib/CodeGen/CodeGenModule.cpp | 73 +++++++++++
clang/lib/CodeGen/CodeGenModule.h | 6 +
clang/lib/Driver/ToolChains/Clang.cpp | 5 +
clang/test/CodeGen/loadtime-comment-vars.c | 28 ++++
.../Utils/LowerCommentStringPass.cpp | 120 ++++++++++--------
.../loadtime-comment-vars.ll | 26 ++++
8 files changed, 212 insertions(+), 55 deletions(-)
create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c
create mode 100644 llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 9454f7672b7e1..062a7a4dff73e 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -323,6 +323,8 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// A list of linker options to embed in the object file.
std::vector<std::string> LinkerOptions;
+ std::vector<std::string> LoadTimeCommentVars;
+
/// Name of the profile file to use as output for -fprofile-instr-generate,
/// -fprofile-generate, and -fcs-profile-generate.
std::string InstrProfileOutput;
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 8b0c701521728..92d86bc3d06f1 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4698,6 +4698,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del
Visibility<[ClangOption, CC1Option]>,
HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">,
MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">;
+def mloadtime_comment_vars_EQ
+ : CommaJoined<["-"], "mloadtime-comment-vars=">,
+ Group<m_Group>,
+ Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Comma-separated list of global variable names to treat as "
+ "loadtime variables">,
+ MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>;
def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">,
Values<"none,explicit,all">,
NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index eaa64b10e2368..a2432a80e71a9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -69,6 +69,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Hash.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
#include "llvm/TargetParser/Triple.h"
@@ -1633,6 +1634,9 @@ void CodeGenModule::Release() {
EmitBackendOptionsMetadata(getCodeGenOpts());
EmitLoadTimeComment();
+
+ // Handle CLI load-time string variables
+ EmitLoadTimeCommentVars();
// If there is device offloading code embed it in the host now.
EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags());
@@ -4106,6 +4110,75 @@ void CodeGenModule::EmitLoadTimeComment() {
}
}
+bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const {
+ // Must be a valid declaration and must have an initializer (the string)
+ if (!D || !D->hasInit())
+ return false;
+
+ QualType Ty = D->getType();
+
+ // 1. Handle Pointers (e.g., char *sccsid, const char *copyright)
+ if (const PointerType *PT = Ty->getAs<PointerType>()) {
+ if (PT->getPointeeType()->isAnyCharacterType())
+ return true;
+ }
+
+ // 2. Handle Arrays (e.g., char version[])
+ // We use ASTContext::getAsArrayType to safely unwrap constant arrays
+ if (const ArrayType *AT = getContext().getAsArrayType(Ty)) {
+ if (AT->getElementType()->isAnyCharacterType())
+ return true;
+ }
+
+ return false; // Reject ints, structs, etc.
+}
+
+void CodeGenModule::EmitLoadTimeCommentVars() {
+ // Handle CLI loadtime comment variables
+ if (!getTriple().isOSAIX())
+ return;
+
+ const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars;
+ if (LoadTimeCommentVars.empty())
+ return;
+
+ TranslationUnitDecl *TU = getContext().getTranslationUnitDecl();
+ // Iterate through ALL top-level declarations
+ for (auto *D : TU->decls()) {
+ if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
+
+ // Check if the variable name is in our parsed list
+ if (!llvm::is_contained(LoadTimeCommentVars, VD->getName()))
+ continue;
+
+ if (!isValidLoadTimeCommentVariable(VD))
+ continue;
+
+ // Get or create the GlobalValue in the IR
+ llvm::Constant *Addr = GetAddrOfGlobalVar(VD);
+
+ // Strip pointer casts safely
+ if (auto *GV =
+ dyn_cast<llvm::GlobalVariable>(Addr->stripPointerCasts())) {
+
+ // Force Clang to emit the definition if it skipped it
+ if (GV->isDeclaration())
+ EmitGlobalDefinition(VD);
+
+ if (!GV->isDeclaration()) {
+ // Tag it for the backend and prevent GC
+ auto &C = getLLVMContext();
+ llvm::Metadata *Ops[] = {llvm::MDString::get(C, VD->getName())};
+ GV->setMetadata("copyright.variable", llvm::MDNode::get(C, Ops));
+
+ // Prevent Linker/Optimization GC
+ addUsedGlobal(GV);
+ }
+ }
+ }
+ }
+}
+
bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// In OpenMP 5.0 variables and function may be marked as
// device_type(host/nohost) and we should not emit them eagerly unless we sure
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d859943ebfb78..60310da9529df 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -2086,6 +2086,12 @@ class CodeGenModule : public CodeGenTypeCache {
/// be processed by the backend to include it in the generated executable.
void EmitLoadTimeComment();
+ /// Helper method to check if a variable Decl is part of
+ /// LoadTimeCommentVars
+ bool isValidLoadTimeCommentVariable(const VarDecl *D) const;
+
+ void EmitLoadTimeCommentVars();
+
/// Determine whether the definition can be emitted eagerly, or should be
/// delayed until the end of the translation unit. This is relevant for
/// definitions whose linkage can change, e.g. implicit function instantions
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6416baf9126ff..0d6e495989adf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6080,6 +6080,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
<< A->getOption().getName() << TripleStr;
}
+ // Forward loadtime-comment vars option to cc1
+ if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) {
+ A->render(Args, CmdArgs);
+ }
+
// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
// `--gpu-use-aux-triple-only` is specified.
if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c
new file mode 100644
index 0000000000000..ef32ba494ed80
--- /dev/null
+++ b/clang/test/CodeGen/loadtime-comment-vars.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s
+
+// String pointer (Should be emitted and tagged)
+static char *sccsid = "@(#) Object sid Version 1.0";
+
+// String array (Should be emitted and tagged)
+static char version[] = "Object scc Version 2.0";
+
+// Const string (Not in CLI list, should NOT be emitted)
+static const char *copyright = "Copyright 2026";
+
+// Integer (In CLI list but invalid type, should NOT be emitted)
+static int build_number = 12345;
+
+void foo() {}
+
+// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !copyright.variable ![[MD_SCC:[0-9]+]]
+// CHECK-NEXT: @.str = private unnamed_addr constant [28 x i8] c"@(#) Object sid Version 1.0\00", align 1
+// CHECK: @version = internal global [23 x i8] c"Object scc Version 2.0\00", align {{[0-9]+}}, !copyright.variable ![[MD_VER:[0-9]+]]
+
+// Ensure the unrequested/invalid variables are optimized away
+// CHECK-NOT: @copyright
+// CHECK-NOT: @build_number
+
+// Ensure the metadata tags contain the correct strings
+// CHECK: ![[MD_SCC]] = !{!"sccsid"}
+// CHECK: ![[MD_VER]] = !{!"version"}
diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
index 6deef2f75e0a3..3a20694a6728a 100644
--- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
+++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
@@ -81,68 +81,78 @@ PreservedAnalyses LowerCommentStringPass::run(Module &M,
LLVMContext &Ctx = M.getContext();
+ // Collect all globals that need implicit refs, both string and variables
+ SmallVector<GlobalValue *, 4> CopyrightGlobals;
+
+ // 1. Process pragma comment copyright (string literal) Once per TU
// Single-metadata: !comment_string.loadtime = !{!0}
// Each operand node is expected to have one MDString operand.
NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime");
- if (!MD || MD->getNumOperands() == 0)
- return PreservedAnalyses::all();
-
- // At this point we are guarateed that one TU contains a single copyright
- // metadata entry. Create TU-local string global for that metadata entry.
- MDNode *MdNode = MD->getOperand(0);
- if (!MdNode || MdNode->getNumOperands() == 0)
- return PreservedAnalyses::all();
-
- auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
- if (!MdString)
- return PreservedAnalyses::all();
-
- StringRef Text = MdString->getString();
- if (Text.empty())
- return PreservedAnalyses::all();
-
- // 1. Create a single NULL-terminated string global
- Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true);
-
- // Internal, constant, TU-local--avoids duplicate symbol issues across TUs.
- auto *StrGV = new GlobalVariable(M, StrInit->getType(),
- /*isConstant=*/true,
- GlobalValue::InternalLinkage, StrInit,
- /*Name=*/"__loadtime_comment_str");
- // Set unnamed_addr to allow the linker to merge identical strings
- StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- StrGV->setAlignment(Align(1));
- // Place in the "__loadtime_comment" section.
- // The GV is constant, so we expect a read-only section.
- StrGV->setSection("__loadtime_comment");
-
- // 2. Add the string to llvm.used to prevent LLVM optimization/LTO passes from
- // removing it.
- appendToUsed(M, {StrGV});
-
- // 3. Attach !implicit ref to every defined function
- // Create a metadata node pointing to the copyright string:
- // !N = !{ptr @__loadtime_comment_str}
- Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)};
- MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops);
-
- // Lambda to attach implicit.ref metadata to a function.
- auto AddImplicitRef = [&](Function &F) {
+ if (MD && MD->getNumOperands() > 0) {
+ MDNode *MdNode = MD->getOperand(0);
+ if (MdNode && MdNode->getNumOperands() > 0) {
+ auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
+ if (MdString && !MdString->getString().empty()) {
+ StringRef Text = MdString->getString();
+
+ // Create the string global
+ Constant *StrInit =
+ ConstantDataArray::getString(Ctx, Text, /*AddNull*/ true);
+ auto *StrGV = new GlobalVariable(M, StrInit->getType(),
+ /*isConstant*/ true,
+ GlobalValue::InternalLinkage, StrInit,
+ "__loadtime_comment_str");
+ StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ StrGV->setAlignment(Align(1));
+ StrGV->setSection("__loadtime_comment");
+
+ // Add the string to llvm.used to prevent LLVM optimization/LTO passes
+ // from removing it
+ appendToUsed(M, {StrGV});
+
+ // Add to list of globals needing implicti refs
+ CopyrightGlobals.push_back(StrGV);
+ }
+ }
+ MD->eraseFromParent();
+ }
+
+ // 2. Process copyright variables - multiple allowed per TU
+ for (GlobalVariable &GV : M.globals()) {
+ if (GV.getMetadata("copyright.variable")) {
+ // Add to list of globals needing implcit refs
+ CopyrightGlobals.push_back(&GV);
+ }
+ }
+
+ // Lambda to attach implicit ref metadata to a function
+ auto AddImplicitRef = [&](Function &F, GlobalValue *GV) {
if (F.isDeclaration())
return;
- // Attach the implicit.ref metadata to the function
- F.setMetadata("implicit.ref", ImplicitRefMD);
- LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: "
- << F.getName() << "\n");
- };
- // Process all functions in the module
- for (Function &F : M)
- AddImplicitRef(F);
+ // Create a new MDNode with exactly ONE operand (the global variable)
+ Metadata *Ops[] = {ConstantAsMetadata::get(GV)};
+ MDNode *NewMD = MDNode::get(Ctx, Ops);
+
+ // addMetadata allows multiple nodes of the same kind to be attached to a
+ // function. This correctly creates a list of single-operand MDNodes.
+ F.addMetadata(LLVMContext::MD_implicit_ref, *NewMD);
- // Cleanup the processed metadata.
- MD->eraseFromParent();
- LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n");
+ LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: "
+ << F.getName() << " for global: " << GV->getName()
+ << "\n");
+ };
+ // 3. Attach implicit ref to all functions for each copyright gglobal
+ if (!CopyrightGlobals.empty()) {
+ // Apply to all functions for all copyright globals
+ for (GlobalValue *GV : CopyrightGlobals) {
+ for (Function &F : M)
+ AddImplicitRef(F, GV);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "[copyright] processed " << CopyrightGlobals.size()
+ << " copyright globals\n");
return PreservedAnalyses::all();
}
diff --git a/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
new file mode 100644
index 0000000000000..ff7c291d3aafc
--- /dev/null
+++ b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
@@ -0,0 +1,26 @@
+; RUN: opt -passes=lower-comment-string -S < %s | FileCheck %s
+
+target triple = "powerpc64-ibm-aix"
+
+ at sccsid = internal global ptr @.str, align 8, !copyright.variable !0
+ at .str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align 1
+ at version = internal global [22 x i8] c"Copyright Version 2.0\00", align 1, !copyright.variable !1
+
+; CHECK: define void @foo() !implicit.ref ![[REF1:[0-9]+]] !implicit.ref ![[REF2:[0-9]+]] {
+define void @foo() {
+entry:
+ ret void
+}
+
+; CHECK: define void @bar() !implicit.ref ![[REF1]] !implicit.ref ![[REF2]] {
+define void @bar() {
+entry:
+ ret void
+}
+
+!0 = !{!"sccsid"}
+!1 = !{!"version"}
+
+; Verify that the generated implicit.ref metadata nodes point to the correct global variables.
+; CHECK: ![[REF1]] = !{ptr @sccsid}
+; CHECK: ![[REF2]] = !{ptr @version}
\ No newline at end of file
More information about the llvm-branch-commits
mailing list