[llvm-branch-commits] [clang] [llvm] [Clang][AIX] Add -mloadtime-comment-vars flag to preserve identifying variables (PR #187986)

Tony Varghese via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Mar 23 00:29:55 PDT 2026


https://github.com/tonykuttai created https://github.com/llvm/llvm-project/pull/187986

None

>From 9fa76d028cc835c1aa1ab55544a6e42d8523e8b3 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Wed, 18 Mar 2026 10:48:36 -0400
Subject: [PATCH] [Clang][AIX] Add -mloadtime-comment-vars flag to preserve
 identifying variables

---
 clang/include/clang/Basic/CodeGenOptions.h    |   2 +
 clang/include/clang/Options/Options.td        |   7 +
 clang/lib/CodeGen/CodeGenModule.cpp           |  73 +++++++++++
 clang/lib/CodeGen/CodeGenModule.h             |   6 +
 clang/lib/Driver/ToolChains/Clang.cpp         |   5 +
 clang/test/CodeGen/loadtime-comment-vars.c    |  28 ++++
 .../Utils/LowerCommentStringPass.cpp          | 120 ++++++++++--------
 .../loadtime-comment-vars.ll                  |  26 ++++
 8 files changed, 212 insertions(+), 55 deletions(-)
 create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c
 create mode 100644 llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll

diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 9454f7672b7e1..062a7a4dff73e 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -323,6 +323,8 @@ class CodeGenOptions : public CodeGenOptionsBase {
   /// A list of linker options to embed in the object file.
   std::vector<std::string> LinkerOptions;
 
+  std::vector<std::string> LoadTimeCommentVars;
+
   /// Name of the profile file to use as output for -fprofile-instr-generate,
   /// -fprofile-generate, and -fcs-profile-generate.
   std::string InstrProfileOutput;
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 8b0c701521728..92d86bc3d06f1 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4698,6 +4698,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">,
   MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">;
+def mloadtime_comment_vars_EQ
+    : CommaJoined<["-"], "mloadtime-comment-vars=">,
+      Group<m_Group>,
+      Visibility<[ClangOption, CC1Option]>,
+      HelpText<"Comma-separated list of global variable names to treat as "
+               "loadtime variables">,
+      MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>;
 def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">,
   Values<"none,explicit,all">,
   NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index eaa64b10e2368..a2432a80e71a9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -69,6 +69,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Hash.h"
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 #include "llvm/TargetParser/RISCVISAInfo.h"
 #include "llvm/TargetParser/Triple.h"
@@ -1633,6 +1634,9 @@ void CodeGenModule::Release() {
   EmitBackendOptionsMetadata(getCodeGenOpts());
 
   EmitLoadTimeComment();
+  
+  // Handle CLI load-time string variables
+  EmitLoadTimeCommentVars();
 
   // If there is device offloading code embed it in the host now.
   EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags());
@@ -4106,6 +4110,75 @@ void CodeGenModule::EmitLoadTimeComment() {
   }
 }
 
+bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const {
+  // Must be a valid declaration and must have an initializer (the string)
+  if (!D || !D->hasInit())
+    return false;
+
+  QualType Ty = D->getType();
+
+  // 1. Handle Pointers (e.g., char *sccsid, const char *copyright)
+  if (const PointerType *PT = Ty->getAs<PointerType>()) {
+    if (PT->getPointeeType()->isAnyCharacterType())
+      return true;
+  }
+
+  // 2. Handle Arrays (e.g., char version[])
+  // We use ASTContext::getAsArrayType to safely unwrap constant arrays
+  if (const ArrayType *AT = getContext().getAsArrayType(Ty)) {
+    if (AT->getElementType()->isAnyCharacterType())
+      return true;
+  }
+
+  return false; // Reject ints, structs, etc.
+}
+
+void CodeGenModule::EmitLoadTimeCommentVars() {
+  // Handle CLI loadtime comment variables
+  if (!getTriple().isOSAIX())
+    return;
+
+  const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars;
+  if (LoadTimeCommentVars.empty())
+    return;
+
+  TranslationUnitDecl *TU = getContext().getTranslationUnitDecl();
+  // Iterate through ALL top-level declarations
+  for (auto *D : TU->decls()) {
+    if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
+
+      // Check if the variable name is in our parsed list
+      if (!llvm::is_contained(LoadTimeCommentVars, VD->getName()))
+        continue;
+
+      if (!isValidLoadTimeCommentVariable(VD))
+        continue;
+
+      // Get or create the GlobalValue in the IR
+      llvm::Constant *Addr = GetAddrOfGlobalVar(VD);
+
+      // Strip pointer casts safely
+      if (auto *GV =
+              dyn_cast<llvm::GlobalVariable>(Addr->stripPointerCasts())) {
+
+        // Force Clang to emit the definition if it skipped it
+        if (GV->isDeclaration())
+          EmitGlobalDefinition(VD);
+
+        if (!GV->isDeclaration()) {
+          // Tag it for the backend and prevent GC
+          auto &C = getLLVMContext();
+          llvm::Metadata *Ops[] = {llvm::MDString::get(C, VD->getName())};
+          GV->setMetadata("copyright.variable", llvm::MDNode::get(C, Ops));
+
+          // Prevent Linker/Optimization GC
+          addUsedGlobal(GV);
+        }
+      }
+    }
+  }
+}
+
 bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
   // In OpenMP 5.0 variables and function may be marked as
   // device_type(host/nohost) and we should not emit them eagerly unless we sure
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d859943ebfb78..60310da9529df 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -2086,6 +2086,12 @@ class CodeGenModule : public CodeGenTypeCache {
   /// be processed by the backend to include it in the generated executable.
   void EmitLoadTimeComment();
 
+  /// Helper method to check if a variable Decl is part of
+  /// LoadTimeCommentVars
+  bool isValidLoadTimeCommentVariable(const VarDecl *D) const;
+
+  void EmitLoadTimeCommentVars();
+
   /// Determine whether the definition can be emitted eagerly, or should be
   /// delayed until the end of the translation unit. This is relevant for
   /// definitions whose linkage can change, e.g. implicit function instantions
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6416baf9126ff..0d6e495989adf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6080,6 +6080,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
           << A->getOption().getName() << TripleStr;
   }
 
+  // Forward loadtime-comment vars option to cc1
+  if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) {
+    A->render(Args, CmdArgs);
+  }
+
   // Prepare `-aux-target-cpu` and `-aux-target-feature` unless
   // `--gpu-use-aux-triple-only` is specified.
   if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c
new file mode 100644
index 0000000000000..ef32ba494ed80
--- /dev/null
+++ b/clang/test/CodeGen/loadtime-comment-vars.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s
+
+// String pointer (Should be emitted and tagged)
+static char *sccsid = "@(#) Object sid Version 1.0";
+
+// String array (Should be emitted and tagged)
+static char version[] = "Object scc Version 2.0";
+
+// Const string (Not in CLI list, should NOT be emitted)
+static const char *copyright = "Copyright 2026";
+
+// Integer (In CLI list but invalid type, should NOT be emitted)
+static int build_number = 12345;
+
+void foo() {}
+
+// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !copyright.variable ![[MD_SCC:[0-9]+]]
+// CHECK-NEXT: @.str = private unnamed_addr constant [28 x i8] c"@(#) Object sid Version 1.0\00", align 1
+// CHECK: @version = internal global [23 x i8] c"Object scc Version 2.0\00", align {{[0-9]+}}, !copyright.variable ![[MD_VER:[0-9]+]]
+
+// Ensure the unrequested/invalid variables are optimized away
+// CHECK-NOT: @copyright
+// CHECK-NOT: @build_number
+
+// Ensure the metadata tags contain the correct strings
+// CHECK: ![[MD_SCC]] = !{!"sccsid"}
+// CHECK: ![[MD_VER]] = !{!"version"}
diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
index 6deef2f75e0a3..3a20694a6728a 100644
--- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
+++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
@@ -81,68 +81,78 @@ PreservedAnalyses LowerCommentStringPass::run(Module &M,
 
   LLVMContext &Ctx = M.getContext();
 
+  // Collect all globals that need implicit refs, both string and variables
+  SmallVector<GlobalValue *, 4> CopyrightGlobals;
+
+  // 1. Process pragma comment copyright (string literal) Once per TU
   // Single-metadata: !comment_string.loadtime = !{!0}
   // Each operand node is expected to have one MDString operand.
   NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime");
-  if (!MD || MD->getNumOperands() == 0)
-    return PreservedAnalyses::all();
-
-  // At this point we are guarateed that one TU contains a single copyright
-  // metadata entry. Create TU-local string global for that metadata entry.
-  MDNode *MdNode = MD->getOperand(0);
-  if (!MdNode || MdNode->getNumOperands() == 0)
-    return PreservedAnalyses::all();
-
-  auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
-  if (!MdString)
-    return PreservedAnalyses::all();
-
-  StringRef Text = MdString->getString();
-  if (Text.empty())
-    return PreservedAnalyses::all();
-
-  // 1. Create a single NULL-terminated string global
-  Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true);
-
-  // Internal, constant, TU-local--avoids duplicate symbol issues across TUs.
-  auto *StrGV = new GlobalVariable(M, StrInit->getType(),
-                                   /*isConstant=*/true,
-                                   GlobalValue::InternalLinkage, StrInit,
-                                   /*Name=*/"__loadtime_comment_str");
-  // Set unnamed_addr to allow the linker to merge identical strings
-  StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  StrGV->setAlignment(Align(1));
-  // Place in the "__loadtime_comment" section.
-  // The GV is constant, so we expect a read-only section.
-  StrGV->setSection("__loadtime_comment");
-
-  // 2. Add the string to llvm.used to prevent LLVM optimization/LTO passes from
-  // removing it.
-  appendToUsed(M, {StrGV});
-
-  // 3. Attach !implicit ref to every defined function
-  // Create a metadata node pointing to the copyright string:
-  //   !N = !{ptr @__loadtime_comment_str}
-  Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)};
-  MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops);
-
-  // Lambda to attach implicit.ref metadata to a function.
-  auto AddImplicitRef = [&](Function &F) {
+  if (MD && MD->getNumOperands() > 0) {
+    MDNode *MdNode = MD->getOperand(0);
+    if (MdNode && MdNode->getNumOperands() > 0) {
+      auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
+      if (MdString && !MdString->getString().empty()) {
+        StringRef Text = MdString->getString();
+
+        // Create the string global
+        Constant *StrInit =
+            ConstantDataArray::getString(Ctx, Text, /*AddNull*/ true);
+        auto *StrGV = new GlobalVariable(M, StrInit->getType(),
+                                         /*isConstant*/ true,
+                                         GlobalValue::InternalLinkage, StrInit,
+                                         "__loadtime_comment_str");
+        StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+        StrGV->setAlignment(Align(1));
+        StrGV->setSection("__loadtime_comment");
+
+        // Add the string to llvm.used to prevent LLVM optimization/LTO passes
+        // from removing it
+        appendToUsed(M, {StrGV});
+
+        // Add to list of globals needing implicti refs
+        CopyrightGlobals.push_back(StrGV);
+      }
+    }
+    MD->eraseFromParent();
+  }  
+
+  // 2. Process copyright variables - multiple allowed per TU
+  for (GlobalVariable &GV : M.globals()) {
+    if (GV.getMetadata("copyright.variable")) {
+      // Add to list of globals needing implcit refs
+      CopyrightGlobals.push_back(&GV);
+    }
+  }
+
+  // Lambda to attach implicit ref metadata to a function
+  auto AddImplicitRef = [&](Function &F, GlobalValue *GV) {
     if (F.isDeclaration())
       return;
-    // Attach the implicit.ref metadata to the function
-    F.setMetadata("implicit.ref", ImplicitRefMD);
-    LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function:  "
-                      << F.getName() << "\n");
-  };
 
-  // Process all functions in the module
-  for (Function &F : M)
-    AddImplicitRef(F);
+    // Create a new MDNode with exactly ONE operand (the global variable)
+    Metadata *Ops[] = {ConstantAsMetadata::get(GV)};
+    MDNode *NewMD = MDNode::get(Ctx, Ops);
+
+    // addMetadata allows multiple nodes of the same kind to be attached to a
+    // function. This correctly creates a list of single-operand MDNodes.
+    F.addMetadata(LLVMContext::MD_implicit_ref, *NewMD);
 
-  // Cleanup the processed metadata.
-  MD->eraseFromParent();
-  LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n");
+    LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: "
+                      << F.getName() << " for global: " << GV->getName()
+                      << "\n");
+  };
 
+  // 3. Attach implicit ref to all functions for each copyright gglobal
+  if (!CopyrightGlobals.empty()) {
+    // Apply to all functions for all copyright globals
+    for (GlobalValue *GV : CopyrightGlobals) {
+      for (Function &F : M)
+        AddImplicitRef(F, GV);
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "[copyright] processed " << CopyrightGlobals.size()
+                    << " copyright globals\n");
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
new file mode 100644
index 0000000000000..ff7c291d3aafc
--- /dev/null
+++ b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
@@ -0,0 +1,26 @@
+; RUN: opt -passes=lower-comment-string -S < %s | FileCheck %s
+
+target triple = "powerpc64-ibm-aix"
+
+ at sccsid = internal global ptr @.str, align 8, !copyright.variable !0
+ at .str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align 1
+ at version = internal global [22 x i8] c"Copyright Version 2.0\00", align 1, !copyright.variable !1
+
+; CHECK: define void @foo() !implicit.ref ![[REF1:[0-9]+]] !implicit.ref ![[REF2:[0-9]+]] {
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: define void @bar() !implicit.ref ![[REF1]] !implicit.ref ![[REF2]] {
+define void @bar() {
+entry:
+  ret void
+}
+
+!0 = !{!"sccsid"}
+!1 = !{!"version"}
+
+; Verify that the generated implicit.ref metadata nodes point to the correct global variables.
+; CHECK: ![[REF1]] = !{ptr @sccsid}
+; CHECK: ![[REF2]] = !{ptr @version}
\ No newline at end of file



More information about the llvm-branch-commits mailing list