[clang] [llvm] Add -funique-source-file-identifier option. (PR #142901)
Peter Collingbourne via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 5 10:51:43 PDT 2025
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/142901
>From 74acb06bb339909bc2950cecb95eb61df49c0379 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Wed, 4 Jun 2025 22:37:09 -0700
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
clang/docs/UsersManual.rst | 17 ++++++++++++-----
clang/include/clang/Basic/CodeGenOptions.def | 2 --
clang/include/clang/Basic/CodeGenOptions.h | 4 ++++
clang/include/clang/Driver/Options.td | 16 +++++++++-------
clang/lib/CodeGen/CodeGenModule.cpp | 9 +++++++--
clang/lib/Driver/ToolChains/Clang.cpp | 10 ++++++++--
clang/test/CodeGen/unique-source-file-names.c | 5 +++--
clang/test/Driver/unique-source-file-names.c | 12 +++++++++---
llvm/lib/Transforms/Utils/ModuleUtils.cpp | 10 ++++++----
.../unique-source-file-names.ll | 3 ++-
10 files changed, 60 insertions(+), 28 deletions(-)
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 8c72f95b94095..62844f7e6a2fa 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2300,12 +2300,14 @@ are listed below.
.. option:: -f[no-]unique-source-file-names
When enabled, allows the compiler to assume that each object file
- passed to the linker has been compiled using a unique source file
- path. This is useful for reducing link times when doing ThinLTO
- in combination with whole-program devirtualization or CFI.
+ passed to the linker has a unique identifier. The identifier for
+ an object file is either the source file path or the value of the
+ argument `-funique-source-file-identifier` if specified. This is
+ useful for reducing link times when doing ThinLTO in combination with
+ whole-program devirtualization or CFI.
- The full source path passed to the compiler must be unique. This
- means that, for example, the following is a usage error:
+ The full source path or identifier passed to the compiler must be
+ unique. This means that, for example, the following is a usage error:
.. code-block:: console
@@ -2327,6 +2329,11 @@ are listed below.
A misuse of this flag may result in a duplicate symbol error at
link time.
+.. option:: -funique-source-file-identifier=IDENTIFIER
+
+ Used with `-funique-source-file-names` to specify a source file
+ identifier.
+
.. option:: -fforce-emit-vtables
In order to improve devirtualization, forces emitting of vtables even in
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index aad4e107cbeb3..fa9474d63ae42 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -278,8 +278,6 @@ CODEGENOPT(SanitizeCfiICallNormalizeIntegers, 1, 0) ///< Normalize integer types
///< CFI icall function signatures
CODEGENOPT(SanitizeCfiCanonicalJumpTables, 1, 0) ///< Make jump table symbols canonical
///< instead of creating a local jump table.
-CODEGENOPT(UniqueSourceFileNames, 1, 0) ///< Allow the compiler to assume that TUs
- ///< have unique source file names at link time
CODEGENOPT(SanitizeKcfiArity, 1, 0) ///< Embed arity in KCFI patchable function prefix
CODEGENOPT(SanitizeCoverageType, 2, 0) ///< Type of sanitizer coverage
///< instrumentation.
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 278803f7bb960..f6a6a7fcfa6d7 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -338,6 +338,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// -fsymbol-partition (see https://lld.llvm.org/Partitions.html).
std::string SymbolPartition;
+ /// If non-empty, allow the compiler to assume that the given source file
+ /// identifier is unique at link time.
+ std::string UniqueSourceFileIdentifier;
+
enum RemarkKind {
RK_Missing, // Remark argument not present on the command line.
RK_Enabled, // Remark enabled via '-Rgroup'.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5ca31c253ed8f..f04e214066ccb 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4204,13 +4204,15 @@ def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>,
def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,
HelpText<"Do not process trigraph sequences">,
Visibility<[ClangOption, CC1Option]>;
-defm unique_source_file_names: BoolOption<"f", "unique-source-file-names",
- CodeGenOpts<"UniqueSourceFileNames">, DefaultFalse,
- PosFlag<SetTrue, [], [CC1Option], "Allow">,
- NegFlag<SetFalse, [], [], "Do not allow">,
- BothFlags<[], [ClangOption], " the compiler to assume that each translation unit has a unique "
- "source file name at link time">>,
- Group<f_clang_Group>;
+def funique_source_file_names: Flag<["-"], "funique-source-file-names">, Group<f_Group>,
+ HelpText<"Allow the compiler to assume that each translation unit has a unique "
+ "source file identifier (see funique-source-file-identifier) at link time">;
+def fno_unique_source_file_names: Flag<["-"], "fno-unique-source-file-names">;
+def unique_source_file_identifier_EQ: Joined<["-"], "funique-source-file-identifier=">, Group<f_Group>,
+ Visibility<[ClangOption, CC1Option]>,
+ HelpText<"Specify the source file identifier for -funique-source-file-names; "
+ "uses the source file path if not specified">,
+ MarshallingInfoString<CodeGenOpts<"UniqueSourceFileIdentifier">>;
def funsigned_bitfields : Flag<["-"], "funsigned-bitfields">, Group<f_Group>;
def funsigned_char : Flag<["-"], "funsigned-char">, Group<f_Group>;
def fno_unsigned_char : Flag<["-"], "fno-unsigned-char">;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 468fc6e0e5c56..4885965b35abb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1146,8 +1146,13 @@ void CodeGenModule::Release() {
1);
}
- if (CodeGenOpts.UniqueSourceFileNames) {
- getModule().addModuleFlag(llvm::Module::Max, "Unique Source File Names", 1);
+ if (!CodeGenOpts.UniqueSourceFileIdentifier.empty()) {
+ getModule().addModuleFlag(
+ llvm::Module::Append, "Unique Source File Identifier",
+ llvm::MDTuple::get(
+ TheModule.getContext(),
+ llvm::MDString::get(TheModule.getContext(),
+ CodeGenOpts.UniqueSourceFileIdentifier)));
}
if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 13842b8cc2870..504d79461d534 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7740,8 +7740,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_late_parse_attributes,
options::OPT_fno_experimental_late_parse_attributes);
- Args.addOptInFlag(CmdArgs, options::OPT_funique_source_file_names,
- options::OPT_fno_unique_source_file_names);
+ if (Args.hasFlag(options::OPT_funique_source_file_names,
+ options::OPT_fno_unique_source_file_names, false)) {
+ if (Arg *A = Args.getLastArg(options::OPT_unique_source_file_identifier_EQ))
+ A->render(Args, CmdArgs);
+ else
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine("-funique-source-file-identifier=") + Input.getBaseInput()));
+ }
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
diff --git a/clang/test/CodeGen/unique-source-file-names.c b/clang/test/CodeGen/unique-source-file-names.c
index 1d5a4a5e8e4c5..df8e3025870ae 100644
--- a/clang/test/CodeGen/unique-source-file-names.c
+++ b/clang/test/CodeGen/unique-source-file-names.c
@@ -1,2 +1,3 @@
-// RUN: %clang_cc1 -funique-source-file-names -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
-// CHECK: !{i32 7, !"Unique Source File Names", i32 1}
+// RUN: %clang_cc1 -funique-source-file-identifier=foo -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// CHECK: !{i32 5, !"Unique Source File Identifier", ![[MD:[0-9]*]]}
+// CHECK: ![[MD]] = !{!"foo"}
diff --git a/clang/test/Driver/unique-source-file-names.c b/clang/test/Driver/unique-source-file-names.c
index 8322f0e37b0c7..0dc71345d745c 100644
--- a/clang/test/Driver/unique-source-file-names.c
+++ b/clang/test/Driver/unique-source-file-names.c
@@ -1,5 +1,11 @@
// RUN: %clang -funique-source-file-names -### %s 2> %t
-// RUN: FileCheck < %t %s
+// RUN: FileCheck --check-prefix=SRC < %t %s
-// CHECK: "-cc1"
-// CHECK: "-funique-source-file-names"
+// SRC: "-cc1"
+// SRC: "-funique-source-file-identifier={{.*}}unique-source-file-names.c"
+
+// RUN: %clang -funique-source-file-names -funique-source-file-identifier=foo -### %s 2> %t
+// RUN: FileCheck --check-prefix=ID < %t %s
+
+// ID: "-cc1"
+// ID: "-funique-source-file-identifier=foo"
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 10efdd61d4553..596849ecab742 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"
@@ -346,10 +347,11 @@ void llvm::filterDeadComdatFunctions(
std::string llvm::getUniqueModuleId(Module *M) {
MD5 Md5;
- auto *UniqueSourceFileNames = mdconst::extract_or_null<ConstantInt>(
- M->getModuleFlag("Unique Source File Names"));
- if (UniqueSourceFileNames && UniqueSourceFileNames->getZExtValue()) {
- Md5.update(M->getSourceFileName());
+ auto *UniqueSourceFileIdentifier = dyn_cast_or_null<MDNode>(
+ M->getModuleFlag("Unique Source File Identifier"));
+ if (UniqueSourceFileIdentifier) {
+ Md5.update(
+ cast<MDString>(UniqueSourceFileIdentifier->getOperand(0))->getString());
} else {
bool ExportsSymbols = false;
for (auto &GV : M->global_values()) {
diff --git a/llvm/test/Transforms/ThinLTOBitcodeWriter/unique-source-file-names.ll b/llvm/test/Transforms/ThinLTOBitcodeWriter/unique-source-file-names.ll
index 0f3fd566f9b1c..13dcefcb70cb5 100644
--- a/llvm/test/Transforms/ThinLTOBitcodeWriter/unique-source-file-names.ll
+++ b/llvm/test/Transforms/ThinLTOBitcodeWriter/unique-source-file-names.ll
@@ -19,4 +19,5 @@ define internal void @f() {
!0 = !{i32 0, !"typeid"}
!llvm.module.flags = !{!1}
-!1 = !{i32 1, !"Unique Source File Names", i32 1}
+!1 = !{i32 5, !"Unique Source File Identifier", !2}
+!2 = !{!"unique-source-file-names.c"}
>From 0bd8438e7917753e0fc981940202acdc2383a9f8 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Wed, 4 Jun 2025 22:47:31 -0700
Subject: [PATCH 2/3] Format
Created using spr 1.3.6-beta.1
---
clang/include/clang/Basic/CodeGenOptions.h | 2 +-
clang/lib/Driver/ToolChains/Clang.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index f6a6a7fcfa6d7..a77232c281f7f 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -341,7 +341,7 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// If non-empty, allow the compiler to assume that the given source file
/// identifier is unique at link time.
std::string UniqueSourceFileIdentifier;
-
+
enum RemarkKind {
RK_Missing, // Remark argument not present on the command line.
RK_Enabled, // Remark enabled via '-Rgroup'.
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 504d79461d534..80dd72a23a673 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7741,7 +7741,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_experimental_late_parse_attributes);
if (Args.hasFlag(options::OPT_funique_source_file_names,
- options::OPT_fno_unique_source_file_names, false)) {
+ options::OPT_fno_unique_source_file_names, false)) {
if (Arg *A = Args.getLastArg(options::OPT_unique_source_file_identifier_EQ))
A->render(Args, CmdArgs);
else
>From af30fa9cc364ed9e8ecbac9ec0da2002fcf80218 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Thu, 5 Jun 2025 10:51:29 -0700
Subject: [PATCH 3/3] Address comments
Created using spr 1.3.6-beta.1
---
clang/include/clang/Driver/Options.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f04e214066ccb..fd6deb22d404e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4206,7 +4206,7 @@ def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,
Visibility<[ClangOption, CC1Option]>;
def funique_source_file_names: Flag<["-"], "funique-source-file-names">, Group<f_Group>,
HelpText<"Allow the compiler to assume that each translation unit has a unique "
- "source file identifier (see funique-source-file-identifier) at link time">;
+ "source file identifier (see -funique-source-file-identifier) at link time">;
def fno_unique_source_file_names: Flag<["-"], "fno-unique-source-file-names">;
def unique_source_file_identifier_EQ: Joined<["-"], "funique-source-file-identifier=">, Group<f_Group>,
Visibility<[ClangOption, CC1Option]>,
More information about the llvm-commits
mailing list