[llvm-branch-commits] [clang] [llvm] [Clang][AIX] Add -mloadtime-comment-vars flag to preserve identifying variables (PR #187986)

Tony Varghese via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat May 23 13:08:18 PDT 2026


https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/187986

>From 7bf2d97109b21eed0bce6b0fd22a961dc62905aa Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Sat, 23 May 2026 14:54:05 -0400
Subject: [PATCH] [PowerPC][AIX] Add -mloadtime-comment-vars support to
 preserve variables in the final object file.

---
 clang/docs/LanguageExtensions.rst             |  67 +++++
 clang/include/clang/Basic/CodeGenOptions.h    |   3 +
 clang/include/clang/Options/Options.td        |   7 +
 clang/lib/CodeGen/CodeGenModule.cpp           |  77 ++++++
 clang/lib/CodeGen/CodeGenModule.h             |   8 +
 clang/lib/Driver/ToolChains/Clang.cpp         |   5 +
 clang/test/CodeGen/loadtime-comment-vars.c    |  37 +++
 .../Utils/LowerCommentStringPass.cpp          | 248 ++++++++++++------
 .../loadtime-comment-vars.ll                  |  34 +++
 9 files changed, 402 insertions(+), 84 deletions(-)
 create mode 100644 clang/test/CodeGen/loadtime-comment-vars.c
 create mode 100644 llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index bab0c299a85ee..0ee9b447b2b55 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -6874,6 +6874,73 @@ local ``internal`` global cannot be imported into another module without
 bringing that global along, which ThinLTO cannot do for ``internal``
 globals.
 
+Preserving Identifying Variables with -mloadtime-comment-vars
+--------------------------------------------------------------
+
+The ``-mloadtime-comment-vars=`` flag accepts a comma-separated list of
+global variable names that should be preserved in the final object file as
+loadtime identifying strings. This is an AIX-specific feature and is silently
+ignored on other targets.
+
+This flag complements ``#pragma comment(copyright, ...)`` for codebases that
+already use the traditional UNIX convention of embedding identifying strings
+directly in source variables, such as ``sccsid`` or ``version``, rather than
+via a pragma.
+
+**Syntax**
+
+.. code-block:: console
+
+  -mloadtime-comment-vars=<var1>[,<var2>,...]
+
+**Valid variable types**
+
+A variable named in the list must meet both of these conditions to be
+preserved:
+
+- Its type must be a character pointer (``char *``, ``const char *``) or a
+  character array (``char[]``).
+- It must have an initializer.
+
+Variables that fail either check -- for example, an ``int`` or a ``struct`` --
+are silently skipped. Variables that appear in the list but are not defined in
+the translation unit are also ignored.
+
+**Example**
+
+.. code-block:: c
+
+  static char *sccsid = "@(#) MyApp Version 1.0";
+  static char  version[] = "@(#) Built 2026-05-24";
+
+  void foo() {}
+
+Compiled with:
+
+.. code-block:: console
+
+  clang -target powerpc64-ibm-aix \
+    -mloadtime-comment-vars=sccsid,version \
+    -c source.c -o source.o
+
+Both ``sccsid`` and ``version`` survive optimization and garbage collection and
+are visible in the object file via standard AIX inspection tools:
+
+.. code-block:: console
+
+  $ what source.o
+  source.o:
+           MyApp Version 1.0
+           Built 2026-05-24
+
+**Interaction with** ``#pragma comment(copyright, ...)``
+
+The two mechanisms can be used together in the same translation unit. The
+pragma produces a dedicated ``__loadtime_comment_str`` symbol placed in the
+``__loadtime_comment`` section, while ``-mloadtime-comment-vars`` preserves
+the named source variables in place using ``.ref`` directives. Both sets of
+strings appear in the final object file independently.
+
 Evaluating Object Size
 ======================
 
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index e43112b4bb98b..54b2fd2077d7b 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -334,6 +334,9 @@ class CodeGenOptions : public CodeGenOptionsBase {
   /// A list of linker options to embed in the object file.
   std::vector<std::string> LinkerOptions;
 
+  /// List of global variable names to preserve as loadtime comment variables.
+  std::vector<std::string> LoadTimeCommentVars;
+
   /// Name of the profile file to use as output for -fprofile-instr-generate,
   /// -fprofile-generate, and -fcs-profile-generate.
   std::string InstrProfileOutput;
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 753e3ac1b74a5..ae800711a2612 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4758,6 +4758,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">,
   MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">;
+def mloadtime_comment_vars_EQ
+    : CommaJoined<["-"], "mloadtime-comment-vars=">,
+      Group<m_Group>,
+      Visibility<[ClangOption, CC1Option]>,
+      HelpText<"Comma-separated list of global variable names to treat as "
+               "loadtime variables">,
+      MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>;
 def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">,
   Values<"none,explicit,all">,
   NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8630e000c59d0..7a389f6e34cc3 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -78,6 +78,7 @@
 #include "llvm/Transforms/Instrumentation/KCFI.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/KCFIHash.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <optional>
 #include <set>
 
@@ -1738,6 +1739,9 @@ void CodeGenModule::Release() {
 
   EmitLoadTimeComment();
 
+  // Emit loadtime comment variables specified via -mloadtime-comment-vars.
+  EmitLoadTimeCommentVars();
+
   // If there is device offloading code embed it in the host now.
   EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags());
 
@@ -4237,6 +4241,79 @@ void CodeGenModule::EmitLoadTimeComment() {
   }
 }
 
+/// Check if a variable declaration is suitable to be treated as a loadtime
+/// comment variable. Valid variables must be character pointers or character
+/// arrays with an initializer.
+bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const {
+  // Must be a valid declaration and must have an initializer (the string).
+  if (!D || !D->hasInit())
+    return false;
+
+  QualType Ty = D->getType();
+
+  // 1. Handle Pointers (e.g., char *sccsid, const char *copyright).
+  if (const PointerType *PT = Ty->getAs<PointerType>()) {
+    if (PT->getPointeeType()->isAnyCharacterType())
+      return true;
+  }
+
+  // 2. Handle Arrays (e.g., char version[])
+  // use ASTContext::getAsArrayType to safely unwrap constant arrays.
+  if (const ArrayType *AT = getContext().getAsArrayType(Ty)) {
+    if (AT->getElementType()->isAnyCharacterType())
+      return true;
+  }
+
+  return false; // Reject ints, structs, etc.
+}
+
+/// Emit global variables specified via -mloadtime-comment-vars as loadtime
+/// comment variables. These variables are tagged with metadata and marked as
+/// used to prevent garbage collection. Only valid on AIX.
+void CodeGenModule::EmitLoadTimeCommentVars() {
+  if (!getTriple().isOSAIX())
+    return;
+
+  const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars;
+  if (LoadTimeCommentVars.empty())
+    return;
+
+  TranslationUnitDecl *TU = getContext().getTranslationUnitDecl();
+  for (auto *D : TU->decls()) {
+    VarDecl *VD = dyn_cast<VarDecl>(D);
+    if (!VD)
+      continue;
+
+    // Check if the variable name is in the loadtime comment vars list.
+    if (!llvm::is_contained(LoadTimeCommentVars, VD->getName()))
+      continue;
+
+    if (!isValidLoadTimeCommentVariable(VD))
+      continue;
+
+    llvm::Constant *Addr = GetAddrOfGlobalVar(VD);
+
+    auto *GV = dyn_cast<llvm::GlobalVariable>(Addr->stripPointerCasts());
+    if (!GV)
+      continue;
+
+    // Force Clang to emit the definition if it skipped it.
+    if (GV->isDeclaration())
+      EmitGlobalDefinition(VD);
+
+    if (GV->isDeclaration())
+      continue;
+
+    // Record the variable name in named module metadata.
+    llvm::NamedMDNode *MD =
+        getModule().getOrInsertNamedMetadata("loadtime_comment.vars");
+    llvm::Metadata *Ops[] = {
+        llvm::MDString::get(getLLVMContext(), VD->getName())};
+    MD->addOperand(llvm::MDNode::get(getLLVMContext(), Ops));
+    llvm::appendToCompilerUsed(getModule(), {GV});
+  }
+}
+
 bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
   // In OpenMP 5.0 variables and function may be marked as
   // device_type(host/nohost) and we should not emit them eagerly unless we sure
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 44b816084316a..a041184c7828c 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -2172,6 +2172,14 @@ class CodeGenModule : public CodeGenTypeCache {
   /// Emit the load-time comment metadata (e.g., from
   /// #pragma comment(copyright, ...)) for the translation unit.
   void EmitLoadTimeComment();
+
+  /// Check if a variable declaration is suitable to be treated as a loadtime
+  /// comment variable (must be a character pointer or array with initializer).
+  bool isValidLoadTimeCommentVariable(const VarDecl *D) const;
+
+  /// Emit global variables specified via -mloadtime-comment-vars as loadtime
+  /// comment variables, tagging them with metadata and preventing removal.
+  void EmitLoadTimeCommentVars();
 };
 
 }  // end namespace CodeGen
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 8d8e00bbaf7d0..4d3aebcc1a2a8 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6180,6 +6180,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   else if (UnwindTables)
      CmdArgs.push_back("-funwind-tables=1");
 
+  // Forward loadtime-comment vars option to cc1.
+  if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) {
+    A->render(Args, CmdArgs);
+  }
+
   // Sframe unwind tables are independent of the other types. Although also
   // defined for aarch64, only x86_64 support is implemented at the moment.
   if (Arg *A = Args.getLastArg(options::OPT_gsframe)) {
diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c
new file mode 100644
index 0000000000000..99c7fd7cc50d4
--- /dev/null
+++ b/clang/test/CodeGen/loadtime-comment-vars.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -O2 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// String pointer 
+static char *sccsid = "@(#) sccsid Version 1.0";
+
+// String array 
+static char version[] = "@(#) Copyright Version 2.0";
+
+// Const string (Not in CLI list, should NOT be emitted)
+static const char *copyright = "@(#) Copyright 2026";
+
+// Integer (In CLI list but invalid type, should NOT be emitted)
+static int build_number = 12345;
+
+// Struct (not in CLI list and invalid type, NOT emitted)
+struct build_info {
+    int major;
+    int minor;
+} static build_data = {1, 0};
+
+void foo() {}
+
+// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}
+// CHECK: @.str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}}
+// CHECK: @version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align {{[0-9]+}}
+// CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @sccsid, ptr @version], section "llvm.metadata"
+
+// Ensure unrequested/invalid variables are not emitted
+// CHECK-NOT: @copyright
+// CHECK-NOT: @build_number
+// CHECK-NOT: @build_data
+
+// Verify named metadata contains the preserved variable names
+// CHECK: !loadtime_comment.vars = !{![[MD_SCC:[0-9]+]], ![[MD_VER:[0-9]+]]}
+// CHECK: ![[MD_SCC]] = !{!"sccsid"}
+// CHECK: ![[MD_VER]] = !{!"version"}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
index cc9bee494d597..1d42a553b5680 100644
--- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
+++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp
@@ -4,40 +4,70 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+//
+// This pass processes copyright and variable metadata for AIX, handling two
+// distinct mechanisms:
+//
+// 1. #pragma comment(copyright, "...") - TU-wide copyright strings
+// 2. -mloadtime-comment-vars=<names> - User-specified global variables
+//
+// Both types of information must be preserved in the final object file and
+// survive optimization passes including DCE and LTO.
 //
-// This pass lowers the module-level comment string metadata emitted by Clang:
+// === #pragma comment(copyright, "...") ===
+//
+// Clang emits module-level metadata for copyright pragmas:
 //
 //     !comment_string.loadtime = !{!"Copyright ..."}
 //
-// into concrete, translation-unit-local globals.
-// This Pass is enabled only for AIX.
-// For each module (translation unit), the pass performs the following:
+// This pass materializes the metadata into a concrete global variable:
 //
 //   1. Creates a null-terminated, internal constant string global
-//      (`__loadtime_comment_str`) containing the copyright text with
-//      section attribute "__loadtime_comment". The backend places this
-//      in the .text section of the object file.
-//
-//   2. Marks the string in `llvm.compiler.used` so it cannot be dropped by
-//      optimization or LTO.
-//
-//   3. Attaches `!implicit.ref` metadata referencing the string to every
-//      defined function in the module. The PowerPC AIX backend recognizes
-//      this metadata and emits a `.ref` directive from the function to the
-//      string, creating a concrete relocation that prevents the linker from
-//      discarding the string (as long as the referencing symbol is kept).
-//
-//  Input IR:
-//     !comment_string.loadtime = !{!"Copyright"}
-//  Output IR:
-//     @__loadtime_comment_str = internal constant [N x i8] c"Copyright\00",
-//                          section "__loadtime_comment"
-//     @llvm.compiler.used = appending global [1 x ptr] [ptr
-//     @__loadtime_comment_str]
-//
-//     define i32 @func() !implicit.ref !5 { ... }
-//     !5 = !{ptr @__loadtime_comment_str}
+//      `__loadtime_comment_str` containing the copyright text with section
+//      attribute "__loadtime_comment". The backend emits this to a special
+//      section in the object file.
+//
+//   2. Marks the global in `llvm.compiler.used` to prevent removal by
+//      optimization passes.
+//
+//   3. Attaches `!implicit.ref` metadata to every defined function,
+//      referencing the global. The PowerPC AIX backend emits a `.ref`
+//      directive for each reference, creating relocations that prevent the
+//      linker from discarding the string.
+//
+// === -mloadtime-comment-vars=<names> ===
+//
+// Clang tags user-specified global variables (e.g., char *sccsid, char
+// version[]) with metadata:
+//
+//     @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"}
+//
+// This pass:
+//
+//   1. Identifies globals tagged with `!copyright.variable` metadata.
+//
+//   2. Attaches `!implicit.ref` metadata to every defined function,
+//      referencing each tagged global. This ensures the variables survive
+//      optimization and linking.
+//
+// === Output Example ===
+//
+// Input IR:
+//     !comment_string.loadtime = !{!"Copyright 2026"}
+//     @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"}
+//
+// Output IR:
+//     @__loadtime_comment_str = internal constant [15 x i8] c"Copyright
+//     2026\00",
+//                               section "__loadtime_comment"
+//     @llvm.compiler.used = appending global [1 x ptr]
+//                           [ptr @__loadtime_comment_str]
+//     @sccsid = internal global ptr @.str, !copyright.variable !{!"sccsid"}
+//
+//     define i32 @func() !implicit.ref !1 !implicit.ref !2 { ... }
+//     !1 = !{ptr @__loadtime_comment_str}
+//     !2 = !{ptr @sccsid}
 //
 //===----------------------------------------------------------------------===//
 
@@ -83,68 +113,118 @@ PreservedAnalyses LowerCommentStringPass::run(Module &M,
 
   LLVMContext &Ctx = M.getContext();
 
-  // Single-metadata: !comment_string.loadtime = !{!0}
-  // Each operand node is expected to have one MDString operand.
+  // This pass processes two types of copyright/identifying information:
+  // 1. A single TU-wide copyright string from #pragma comment(copyright, "...")
+  // 2. Multiple user-specified variables from -mloadtime-comment-vars=...
+  //
+  // Both need implicit references from every function to survive DCE and LTO.
+  // Collect all copyright globals, then create implicit references
+  // from every function definition to each global. This forces the backend
+  // to treat them as reachable and preserve them in the final object file.
+  SmallVector<GlobalValue *, 4> CopyrightGlobals;
+
+  // =========================================================================
+  // Process #pragma comment(copyright, "...") - at most one per TU
+  // =========================================================================
+  // Frontend emits module-level metadata:
+  //   !comment_string.loadtime = !{!0}
+  //   !0 = !{!"Copyright text here"}
+  //
+  // We materialize this as a global string in the __loadtime_comment section,
+  // which linkers recognize and include in the object file's loadtime
+  // comment area.
   NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime");
-  if (!MD || MD->getNumOperands() == 0)
-    return PreservedAnalyses::all();
-
-  // At this point we are guaranteed that one TU contains a single copyright
-  // metadata entry. Create TU-local string global for that metadata entry.
-  MDNode *MdNode = MD->getOperand(0);
-  if (!MdNode || MdNode->getNumOperands() == 0)
-    return PreservedAnalyses::all();
-
-  auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
-  if (!MdString)
-    return PreservedAnalyses::all();
-
-  StringRef Text = MdString->getString();
-  if (Text.empty())
-    return PreservedAnalyses::all();
-
-  // 1. Create a single null-terminated string global.
-  Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true);
-
-  // The global variable should be internal, constant, and TU-local.
-  // This avoids duplicate symbol issues across TUs.
-  auto *StrGV = new GlobalVariable(M, StrInit->getType(),
-                                   /*isConstant=*/true,
-                                   GlobalValue::InternalLinkage, StrInit,
-                                   /*Name=*/"__loadtime_comment_str");
-  // Set unnamed_addr to allow the linker to merge identical strings.
-  StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  StrGV->setAlignment(Align(1));
-  // Place in the "__loadtime_comment" section.
-  // The GV is constant, so we expect a read-only section.
-  StrGV->setSection("__loadtime_comment");
-
-  // 2. Add the string to llvm.compiler.used to prevent LLVM optimization/LTO
-  // passes from removing it.
-  appendToCompilerUsed(M, {StrGV});
-
-  // 3. Attach !implicit.ref metadata to every defined function.
-  // Create a metadata node pointing to the copyright string:
-  //   !N = !{ptr @__loadtime_comment_str}
-  Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)};
-  MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops);
-
-  auto AddImplicitRef = [&](Function &F) {
+  if (MD && MD->getNumOperands() > 0) {
+    MDNode *MdNode = MD->getOperand(0);
+    if (MdNode && MdNode->getNumOperands() > 0) {
+      auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0));
+      if (MdString && !MdString->getString().empty()) {
+        StringRef Text = MdString->getString();
+        // Create a null-terminated string constant in the special section.
+        Constant *StrInit =
+            ConstantDataArray::getString(Ctx, Text, /*AddNull*/ true);
+        // The global variable should be internal, constant, and TU-local.
+        // This avoids duplicate symbol issues across TUs.
+        auto *StrGV = new GlobalVariable(M, StrInit->getType(),
+                                         /*isConstant=*/true,
+                                         GlobalValue::InternalLinkage, StrInit,
+                                         /*Name=*/"__loadtime_comment_str");
+        StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+        StrGV->setAlignment(Align(1));
+        // Backend recognizes this section and emits it to .loadtime_comment.
+        StrGV->setSection("__loadtime_comment");
+        // Prevent removal by optimizer passes (but not sufficient for linker).
+        appendToCompilerUsed(M, {StrGV});
+        // Add to list - will get implicit refs from all functions below.
+        CopyrightGlobals.push_back(StrGV);
+      }
+    }
+    // Clean up the metadata as we have consumed it.
+    MD->eraseFromParent();
+  }
+
+  // =========================================================================
+  // Process -mloadtime-comment-vars=sccsid,version,... (CLI flag)
+  // =========================================================================
+  // Frontend stores variable names in named metadata:
+  //   !loadtime_comment.vars = !{!{!"sccsid"}, !{!"version"}}
+  //
+  // We look each name up by M.getNamedGlobal() rather than walking globals
+  // looking for per-global metadata, because per-global metadata is droppable
+  // and may be stripped by optimization passes before this pass runs.
+  NamedMDNode *VarsMD = M.getNamedMetadata("loadtime_comment.vars");
+  if (VarsMD) {
+    for (unsigned I = 0, E = VarsMD->getNumOperands(); I < E; ++I) {
+      MDNode *Entry = VarsMD->getOperand(I);
+      if (!Entry || Entry->getNumOperands() == 0)
+        continue;
+
+      auto *VarName = dyn_cast_or_null<MDString>(Entry->getOperand(0));
+      if (!VarName || VarName->getString().empty())
+        continue;
+
+      GlobalValue *GV = M.getNamedGlobal(VarName->getString());
+      if (!GV || GV->isDeclaration())
+        continue;
+
+      appendToCompilerUsed(M, {GV});
+
+      CopyrightGlobals.push_back(GV);
+    }
+    VarsMD->eraseFromParent();
+  }
+
+  // =========================================================================
+  // Create implicit references from every function to each global
+  // =========================================================================
+  // Each implicit.ref node references exactly ONE global. Multiple nodes
+  // can be attached to a single function (e.g., !implicit.ref !1, !implicit.ref
+  // !2).
+  auto AddImplicitRef = [&](Function &F, GlobalValue *GV) {
     if (F.isDeclaration())
       return;
-    // Attach the !implicit.ref metadata to the function.
-    F.setMetadata(LLVMContext::MD_implicit_ref, ImplicitRefMD);
-    LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function:  "
-                      << F.getName() << "\n");
+    // Create metadata: !N = !{ptr @global_variable}
+    Metadata *Ops[] = {ConstantAsMetadata::get(GV)};
+    MDNode *NewMD = MDNode::get(Ctx, Ops);
+    // Attach to function - addMetadata allows multiple !implicit.ref nodes per
+    // function, one for each copyright global.
+    F.addMetadata(LLVMContext::MD_implicit_ref, *NewMD);
+
+    LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: "
+                      << F.getName() << " for global: " << GV->getName()
+                      << "\n");
   };
 
-  // Process all functions in the module and add !implicit.ref to the function.
-  for (Function &F : M)
-    AddImplicitRef(F);
+  // Apply implicit references: for each global, mark all functions as users.
+  if (!CopyrightGlobals.empty()) {
+    for (GlobalValue *GV : CopyrightGlobals) {
+      for (Function &F : M)
+        AddImplicitRef(F, GV);
+    }
+  }
 
-  // Cleanup the processed metadata.
-  MD->eraseFromParent();
-  LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n");
+  LLVM_DEBUG(dbgs() << "[copyright] processed " << CopyrightGlobals.size()
+                    << " copyright globals\n");
 
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
new file mode 100644
index 0000000000000..3dd32f652023a
--- /dev/null
+++ b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll
@@ -0,0 +1,34 @@
+; RUN: opt -passes=lower-comment-string -S < %s | FileCheck %s
+
+target triple = "powerpc64-ibm-aix"
+
+ at sccsid = internal global ptr @.str, align 8
+ at .str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align 1
+ at version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align 1
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @sccsid, ptr @version], section "llvm.metadata"
+
+define void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}
+
+!loadtime_comment.vars = !{!1, !2}
+!1 = !{!"sccsid"}
+!2 = !{!"version"}
+
+; CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}
+; CHECK: @.str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align {{[0-9]+}}
+; CHECK: @version = internal global [27 x i8] c"@(#) Copyright Version 2.0\00", align {{[0-9]+}}
+; CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @sccsid, ptr @version], section "llvm.metadata"
+
+; CHECK: define void @foo() !implicit.ref ![[REF1:[0-9]+]] !implicit.ref ![[REF2:[0-9]+]] {
+; CHECK: define void @bar() !implicit.ref ![[REF1]] !implicit.ref ![[REF2]] {
+
+; Verify that the generated implicit.ref metadata nodes point to the correct global variables.
+; CHECK: ![[REF1]] = !{ptr @sccsid}
+; CHECK: ![[REF2]] = !{ptr @version}



More information about the llvm-branch-commits mailing list